[llvm-branch-commits] [llvm] [AMDGPU] Propagate Constants for Wave Reduction Intrinsics (PR #150395)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 20 02:43:31 PDT 2025


https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/150395

>From e737055bd599c78eaf5c33d69d083e589efe216a Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Thu, 24 Jul 2025 15:14:30 +0530
Subject: [PATCH] Propagate Constants for Wave Reduction Intrinsics

---
 llvm/lib/Analysis/ConstantFolding.cpp         |   14 +
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll  | 2336 +++++----------
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll  |  613 +---
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll  |  613 +---
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll  |  613 +---
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll   |  613 +---
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll  | 2620 ++++++-----------
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll |  398 +--
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll |  541 +---
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll  | 2121 +++++--------
 .../ConstProp/AMDGPU/wave.reduce.ll           |  455 ++-
 11 files changed, 3605 insertions(+), 7332 deletions(-)
 mode change 100644 => 100755 llvm/lib/Analysis/ConstantFolding.cpp

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
old mode 100644
new mode 100755
index c14cb9e800917..3df78e96e656f
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1652,6 +1652,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::amdgcn_perm:
   case Intrinsic::amdgcn_wave_reduce_umin:
   case Intrinsic::amdgcn_wave_reduce_umax:
+  case Intrinsic::amdgcn_wave_reduce_max:
+  case Intrinsic::amdgcn_wave_reduce_min:
+  case Intrinsic::amdgcn_wave_reduce_add:
+  case Intrinsic::amdgcn_wave_reduce_sub:
+  case Intrinsic::amdgcn_wave_reduce_and:
+  case Intrinsic::amdgcn_wave_reduce_or:
+  case Intrinsic::amdgcn_wave_reduce_xor:
   case Intrinsic::amdgcn_s_wqm:
   case Intrinsic::amdgcn_s_quadmask:
   case Intrinsic::amdgcn_s_bitreplicate:
@@ -3525,6 +3532,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
       return ConstantInt::get(Ty, C0->abs());
     case Intrinsic::amdgcn_wave_reduce_umin:
     case Intrinsic::amdgcn_wave_reduce_umax:
+    case Intrinsic::amdgcn_wave_reduce_max:
+    case Intrinsic::amdgcn_wave_reduce_min:
+    case Intrinsic::amdgcn_wave_reduce_add:
+    case Intrinsic::amdgcn_wave_reduce_sub:
+    case Intrinsic::amdgcn_wave_reduce_and:
+    case Intrinsic::amdgcn_wave_reduce_or:
+    case Intrinsic::amdgcn_wave_reduce_xor:
       return dyn_cast<Constant>(Operands[0]);
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index 5f303e30c923b..a7ebf458d2591 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -200,1781 +200,1011 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
+; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX8DAGISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: const_value:
+; GFX8GISEL-LABEL: divergent_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX8GISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: const_value:
+; GFX9DAGISEL-LABEL: divergent_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX9DAGISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT:  ; %bb.2:
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: const_value:
+; GFX9GISEL-LABEL: divergent_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX9GISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT:  ; %bb.2:
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value:
+; GFX1064DAGISEL-LABEL: divergent_value:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1064DAGISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT:  ; %bb.2:
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: const_value:
+; GFX1064GISEL-LABEL: divergent_value:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1064GISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT:  ; %bb.2:
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: const_value:
+; GFX1032DAGISEL-LABEL: divergent_value:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
+; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1032DAGISEL-NEXT:    s_add_i32 s2, s2, s5
+; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT:  ; %bb.2:
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: const_value:
+; GFX1032GISEL-LABEL: divergent_value:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
+; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
+; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1032GISEL-NEXT:    s_add_i32 s2, s2, s5
+; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: const_value:
+; GFX1164DAGISEL-LABEL: divergent_value:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1164DAGISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT:  ; %bb.2:
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: const_value:
+; GFX1164GISEL-LABEL: divergent_value:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1164GISEL-NEXT:    s_add_i32 s4, s4, s6
+; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT:  ; %bb.2:
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: const_value:
+; GFX1132DAGISEL-LABEL: divergent_value:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1132DAGISEL-NEXT:    s_add_i32 s2, s2, s5
+; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT:  ; %bb.2:
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: const_value:
+; GFX1132GISEL-LABEL: divergent_value:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1132GISEL-NEXT:    s_add_i32 s2, s2, s5
+; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT:  ; %bb.2:
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
 ;
-; GFX12DAGISEL-LABEL: const_value:
+; GFX12DAGISEL-LABEL: divergent_value:
 ; GFX12DAGISEL:       ; %bb.0: ; %entry
 ; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX12DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX12DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX12DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX12DAGISEL-NEXT:    s_add_co_i32 s2, s2, s5
+; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX12DAGISEL-NEXT:  ; %bb.2:
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX12DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX12DAGISEL-NEXT:    s_endpgm
 entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1)
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1)
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
+define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
+; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX8DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX8DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX8DAGISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8DAGISEL-NEXT:  ; %bb.5:
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: poison_value:
+; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8GISEL-NEXT:  ; %bb.1: ; %else
+; GFX8GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX8GISEL-NEXT:  ; %bb.3: ; %if
+; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX8GISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: poison_value:
+; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX9DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
+; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX9DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX9DAGISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9DAGISEL-NEXT:  ; %bb.5:
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: poison_value:
+; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9GISEL-NEXT:  ; %bb.1: ; %else
+; GFX9GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX9GISEL-NEXT:  ; %bb.3: ; %if
+; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX9GISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value:
+; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1064DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX1064DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1064DAGISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064DAGISEL-NEXT:  ; %bb.5:
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value:
+; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1064GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1064GISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value:
+; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX1032DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
+; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1032DAGISEL-NEXT:    s_add_i32 s1, s1, s6
+; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032DAGISEL-NEXT:  ; %bb.5:
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value:
+; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
+; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX1032GISEL-NEXT:    s_mul_i32 s0, s0, s2
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
+; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1032GISEL-NEXT:    s_add_i32 s0, s0, s6
+; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value:
+; GFX1164DAGISEL-LABEL: divergent_cfg:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1164DAGISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX1164DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1164DAGISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164DAGISEL-NEXT:  ; %bb.5:
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value:
+; GFX1164GISEL-LABEL: divergent_cfg:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1164GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
+; GFX1164GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1164GISEL-NEXT:    s_add_i32 s6, s6, s8
+; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value:
+; GFX1132DAGISEL-LABEL: divergent_cfg:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: poison_value:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: divergent_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX8DAGISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX8DAGISEL-NEXT:  ; %bb.2:
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX8GISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX8GISEL-NEXT:  ; %bb.2:
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX9DAGISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX9DAGISEL-NEXT:  ; %bb.2:
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX9GISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX9GISEL-NEXT:  ; %bb.2:
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1064DAGISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1064DAGISEL-NEXT:  ; %bb.2:
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1064GISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1064GISEL-NEXT:  ; %bb.2:
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1032DAGISEL-NEXT:    s_add_i32 s2, s2, s5
-; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1032DAGISEL-NEXT:  ; %bb.2:
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1032GISEL-NEXT:    s_add_i32 s2, s2, s5
-; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1032GISEL-NEXT:  ; %bb.2:
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1164DAGISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1164DAGISEL-NEXT:  ; %bb.2:
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1164GISEL-NEXT:    s_add_i32 s4, s4, s6
-; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1164GISEL-NEXT:  ; %bb.2:
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1132DAGISEL-NEXT:    s_add_i32 s2, s2, s5
-; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1132DAGISEL-NEXT:  ; %bb.2:
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1132GISEL-NEXT:    s_add_i32 s2, s2, s5
-; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1132GISEL-NEXT:  ; %bb.2:
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: divergent_value:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX12DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX12DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
-; GFX12DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX12DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX12DAGISEL-NEXT:    s_add_co_i32 s2, s2, s5
-; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX12DAGISEL-NEXT:  ; %bb.2:
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
-  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: divergent_cfg:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX8DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX8DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX8DAGISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8DAGISEL-NEXT:  ; %bb.5:
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_cfg:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX8GISEL-NEXT:  ; %bb.1: ; %else
-; GFX8GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX8GISEL-NEXT:  ; %bb.3: ; %if
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX8GISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_cfg:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX9DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX9DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX9DAGISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9DAGISEL-NEXT:  ; %bb.5:
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_cfg:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX9GISEL-NEXT:  ; %bb.1: ; %else
-; GFX9GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX9GISEL-NEXT:  ; %bb.3: ; %if
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX9GISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_cfg:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1064DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX1064DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1064DAGISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064DAGISEL-NEXT:  ; %bb.5:
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_cfg:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1064GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1064GISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_cfg:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
-; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
-; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1032DAGISEL-NEXT:    s_add_i32 s1, s1, s6
-; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032DAGISEL-NEXT:  ; %bb.5:
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_cfg:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
-; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
-; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s0, s0, s2
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
-; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1032GISEL-NEXT:    s_add_i32 s0, s0, s6
-; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_cfg:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
-; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1164DAGISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX1164DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1164DAGISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164DAGISEL-NEXT:  ; %bb.5:
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_cfg:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
-; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
-; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1164GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1164GISEL-NEXT:    s_add_i32 s6, s6, s8
-; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_cfg:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1132DAGISEL-NEXT:    s_add_i32 s1, s1, s6
-; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132DAGISEL-NEXT:  ; %bb.5:
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_cfg:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
-; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
-; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s0, s0, s2
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1132GISEL-NEXT:    s_add_i32 s0, s0, s6
-; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: divergent_cfg:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX12DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GFX12DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX12DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX12DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX12DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX12DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX12DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX12DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX12DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX12DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
-; GFX12DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX12DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX12DAGISEL-NEXT:    s_add_co_i32 s1, s1, s6
-; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
-; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX12DAGISEL-NEXT:  ; %bb.5:
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX12DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX12DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %d_cmp = icmp ult i32 %tid, 16
-  br i1 %d_cmp, label %if, label %else
-
-if:
-  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1)
-  br label %endif
-
-else:
-  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1)
-  br label %endif
-
-endif:
-  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
-  store i32 %combine, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: uniform_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    s_mul_i32 s0, s2, s4
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s1, s2, s4
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, s4
-; GFX8DAGISEL-NEXT:    s_add_u32 s1, s1, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: uniform_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s2, s5
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s5
-; GFX8GISEL-NEXT:    s_add_u32 s5, s2, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: uniform_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s5
-; GFX9DAGISEL-NEXT:    s_add_u32 s5, s2, s3
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: uniform_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s2, s5
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s5
-; GFX9GISEL-NEXT:    s_add_u32 s5, s2, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: uniform_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: uniform_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: uniform_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: uniform_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: uniform_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: uniform_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: uniform_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: uniform_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: uniform_value_i64:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
-; GFX12DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s5, s3
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX12DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX1132DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1132DAGISEL-NEXT:    s_add_i32 s1, s1, s6
+; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132DAGISEL-NEXT:  ; %bb.5:
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: const_value_i64:
+; GFX1132GISEL-LABEL: divergent_cfg:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132GISEL-NEXT:    s_mul_i32 s0, s0, s2
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1132GISEL-NEXT:    s_add_i32 s0, s0, s6
+; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
+; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
 ; GFX1132GISEL-NEXT:    s_endpgm
 ;
-; GFX12DAGISEL-LABEL: const_value_i64:
+; GFX12DAGISEL-LABEL: divergent_cfg:
 ; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX12DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX12DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX12DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX12DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX12DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX12DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
 ; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX12DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX12DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s3, s4
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX12DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX12DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX12DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX12DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX12DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX12DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX12DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
+; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX12DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX12DAGISEL-NEXT:    s_add_co_i32 s1, s1, s6
+; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX12DAGISEL-NEXT:  ; %bb.5:
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX12DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX12DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12DAGISEL-NEXT:    s_endpgm
 entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %d_cmp = icmp ult i32 %tid, 16
+  br i1 %d_cmp, label %if, label %else
+
+if:
+  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1)
+  br label %endif
+
+else:
+  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1)
+  br label %endif
+
+endif:
+  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
+  store i32 %combine, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
+define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
+; GFX8DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s4, s3
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX8DAGISEL-NEXT:    s_mul_i32 s0, s2, s4
+; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s1, s2, s4
+; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, s4
+; GFX8DAGISEL-NEXT:    s_add_u32 s1, s1, s2
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: poison_value_i64:
+; GFX8GISEL-LABEL: uniform_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
+; GFX8GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT:    s_mul_i32 s4, s2, s5
+; GFX8GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
+; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s5
+; GFX8GISEL-NEXT:    s_add_u32 s5, s2, s3
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: poison_value_i64:
+; GFX9DAGISEL-LABEL: uniform_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
+; GFX9DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
+; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
+; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s5
+; GFX9DAGISEL-NEXT:    s_add_u32 s5, s2, s3
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: poison_value_i64:
+; GFX9GISEL-LABEL: uniform_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
+; GFX9GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT:    s_mul_i32 s4, s2, s5
+; GFX9GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
+; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s5
+; GFX9GISEL-NEXT:    s_add_u32 s5, s2, s3
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
+; GFX1064DAGISEL-LABEL: uniform_value_i64:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value_i64:
+; GFX1064GISEL-LABEL: uniform_value_i64:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1064GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1064GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1064GISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
+; GFX1032DAGISEL-LABEL: uniform_value_i64:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value_i64:
+; GFX1032GISEL-LABEL: uniform_value_i64:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1032GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1032GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1032GISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
+; GFX1164DAGISEL-LABEL: uniform_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value_i64:
+; GFX1164GISEL-LABEL: uniform_value_i64:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1164GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1164GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1164GISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
+; GFX1132DAGISEL-LABEL: uniform_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: poison_value_i64:
+; GFX1132GISEL-LABEL: uniform_value_i64:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX1132GISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1132GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1132GISEL-NEXT:    s_add_u32 s3, s5, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
 ; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
 ;
-; GFX12DAGISEL-LABEL: poison_value_i64:
+; GFX12DAGISEL-LABEL: uniform_value_i64:
 ; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX12DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX12DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s3, s4
+; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s5, s2, s4
+; GFX12DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s5, s3
 ; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX12DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX12DAGISEL-NEXT:    s_endpgm
 entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1)
+  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1)
   store i64 %result, ptr addrspace(1) %out
   ret void
 }
@@ -1985,7 +1215,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -1993,7 +1223,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX8DAGISEL-NEXT:    s_addc_u32 s5, s5, s10
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2006,7 +1236,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX8GISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2014,7 +1244,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX8GISEL-NEXT:    s_addc_u32 s5, s5, s10
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2027,7 +1257,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2035,7 +1265,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX9DAGISEL-NEXT:    s_addc_u32 s5, s5, s10
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2048,7 +1278,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX9GISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2056,7 +1286,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX9GISEL-NEXT:    s_addc_u32 s5, s5, s10
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2069,7 +1299,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2077,7 +1307,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_add_u32 s4, s4, s9
 ; GFX1064DAGISEL-NEXT:    s_addc_u32 s5, s5, s10
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2089,7 +1319,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2097,7 +1327,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_add_u32 s4, s4, s9
 ; GFX1064GISEL-NEXT:    s_addc_u32 s5, s5, s10
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2109,7 +1339,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s9, v3, s7
@@ -2117,7 +1347,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_add_u32 s4, s4, s8
 ; GFX1032DAGISEL-NEXT:    s_addc_u32 s5, s5, s9
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2129,7 +1359,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s9, v3, s7
@@ -2137,7 +1367,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_add_u32 s4, s4, s8
 ; GFX1032GISEL-NEXT:    s_addc_u32 s5, s5, s9
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2149,7 +1379,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s4, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s5, v2, s4
@@ -2158,7 +1388,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_add_u32 s0, s0, s5
 ; GFX1164DAGISEL-NEXT:    s_addc_u32 s1, s1, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -2170,7 +1400,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s4, s[2:3]
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s5, v2, s4
@@ -2179,7 +1409,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_add_u32 s0, s0, s5
 ; GFX1164GISEL-NEXT:    s_addc_u32 s1, s1, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -2191,7 +1421,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -2200,7 +1430,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_add_u32 s0, s0, s4
 ; GFX1132DAGISEL-NEXT:    s_addc_u32 s1, s1, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -2211,7 +1441,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -2220,7 +1450,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_add_u32 s0, s0, s4
 ; GFX1132GISEL-NEXT:    s_addc_u32 s1, s1, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -2235,7 +1465,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX12DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
 ; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
@@ -2245,7 +1475,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
 ; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX12DAGISEL-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[4:5]
-; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX12DAGISEL-NEXT:  ; %bb.2:
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
 ; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
@@ -2266,7 +1496,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s7, s[6:7]
@@ -2275,7 +1505,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s7
 ; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, s7
 ; GFX8DAGISEL-NEXT:    s_add_u32 s7, s2, s3
-; GFX8DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -2304,7 +1534,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s7, s[6:7]
@@ -2313,10 +1543,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    s_mul_hi_u32 s2, s2, s7
 ; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s7
 ; GFX8GISEL-NEXT:    s_add_u32 s7, s2, s3
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
@@ -2326,7 +1556,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    s_mul_hi_u32 s4, s4, s7
 ; GFX8GISEL-NEXT:    s_mul_i32 s5, s5, s7
 ; GFX8GISEL-NEXT:    s_add_u32 s7, s4, s5
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -2343,7 +1573,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s5, s[4:5]
@@ -2352,7 +1582,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
 ; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s5
 ; GFX9DAGISEL-NEXT:    s_add_u32 s5, s2, s3
-; GFX9DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -2380,7 +1610,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s7, s[6:7]
@@ -2389,10 +1619,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    s_mul_hi_u32 s2, s2, s7
 ; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s7
 ; GFX9GISEL-NEXT:    s_add_u32 s7, s2, s3
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
@@ -2402,7 +1632,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    s_mul_hi_u32 s5, s8, s4
 ; GFX9GISEL-NEXT:    s_mul_i32 s4, s9, s4
 ; GFX9GISEL-NEXT:    s_add_u32 s7, s5, s4
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2419,7 +1649,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr8_sgpr9
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s8, s[8:9]
@@ -2428,7 +1658,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s8
 ; GFX1064DAGISEL-NEXT:    s_mul_i32 s8, s2, s8
 ; GFX1064DAGISEL-NEXT:    s_add_u32 s9, s9, s3
-; GFX1064DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[4:5]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s8
@@ -2456,7 +1686,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2465,10 +1695,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s6
 ; GFX1064GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1064GISEL-NEXT:    s_add_u32 s7, s7, s3
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
@@ -2478,7 +1708,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    s_mul_i32 s7, s7, s4
 ; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s4
 ; GFX1064GISEL-NEXT:    s_add_u32 s7, s5, s7
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2495,7 +1725,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
@@ -2504,7 +1734,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s2, s4
 ; GFX1032DAGISEL-NEXT:    s_add_u32 s5, s5, s3
-; GFX1032DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -2532,7 +1762,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s6, s6
@@ -2541,10 +1771,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s6
 ; GFX1032GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1032GISEL-NEXT:    s_add_u32 s7, s7, s3
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
@@ -2554,7 +1784,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    s_mul_i32 s5, s7, s3
 ; GFX1032GISEL-NEXT:    s_mul_i32 s6, s6, s3
 ; GFX1032GISEL-NEXT:    s_add_u32 s7, s4, s5
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2573,7 +1803,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -2583,7 +1813,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s8
 ; GFX1164DAGISEL-NEXT:    s_mul_i32 s8, s2, s8
 ; GFX1164DAGISEL-NEXT:    s_add_u32 s9, s9, s3
-; GFX1164DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[6:7]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s8
@@ -2615,7 +1845,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -2625,10 +1855,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s6
 ; GFX1164GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1164GISEL-NEXT:    s_add_u32 s7, s7, s3
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], exec
@@ -2639,7 +1869,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_mul_i32 s5, s5, s6
 ; GFX1164GISEL-NEXT:    s_mul_i32 s6, s4, s6
 ; GFX1164GISEL-NEXT:    s_add_u32 s7, s7, s5
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2658,7 +1888,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -2668,7 +1898,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s6
 ; GFX1132DAGISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1132DAGISEL-NEXT:    s_add_u32 s7, s7, s3
-; GFX1132DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -2698,7 +1928,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -2708,10 +1938,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s6
 ; GFX1132GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1132GISEL-NEXT:    s_add_u32 s7, s7, s3
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
@@ -2722,7 +1952,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_mul_i32 s5, s5, s3
 ; GFX1132GISEL-NEXT:    s_mul_i32 s6, s4, s3
 ; GFX1132GISEL-NEXT:    s_add_u32 s7, s7, s5
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -2740,7 +1970,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX12DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX12DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX12DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -2750,7 +1980,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX12DAGISEL-NEXT:    s_mul_i32 s3, s3, s6
 ; GFX12DAGISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX12DAGISEL-NEXT:    s_add_co_u32 s7, s7, s3
-; GFX12DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX12DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index 08a978b0b34c1..f39dd867f9580 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -124,187 +124,19 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8DAGISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8GISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9DAGISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9GISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064DAGISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064GISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, s5
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164DAGISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164GISEL-NEXT:    s_and_b32 s4, s4, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8DAGISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX8DAGISEL-NEXT:  ; %bb.5:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8GISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9DAGISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX9DAGISEL-NEXT:  ; %bb.5:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9GISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064DAGISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1064DAGISEL-NEXT:  ; %bb.5:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064GISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, -1
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032DAGISEL-NEXT:    s_and_b32 s1, s1, s6
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1032DAGISEL-NEXT:  ; %bb.5:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, -1
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032GISEL-NEXT:    s_and_b32 s0, s0, s6
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164DAGISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1164DAGISEL-NEXT:  ; %bb.5:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164GISEL-NEXT:    s_and_b32 s6, s6, s8
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, -1
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_and_b32 s1, s1, s6
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1132DAGISEL-NEXT:  ; %bb.5:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, -1
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_and_b32 s0, s0, s6
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,198 +919,20 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-LABEL: divergent_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX8DAGISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1291,14 +945,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX8GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX8GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1311,14 +965,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX9DAGISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1331,14 +985,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX9GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX9GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1351,14 +1005,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX1064DAGISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1370,14 +1024,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX1064GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1389,14 +1043,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s9, v3, s7
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1408,14 +1062,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s9, v3, s7
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1427,7 +1081,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s6, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s4, v2, s6
@@ -1435,7 +1089,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s6
 ; GFX1164DAGISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1447,7 +1101,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s6, s[2:3]
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s4, v2, s6
@@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s6
 ; GFX1164GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1467,7 +1121,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -1475,7 +1129,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -1494,7 +1148,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1535,19 +1189,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1584,19 +1238,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1633,19 +1287,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1682,19 +1336,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1735,19 +1389,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1786,19 +1440,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -1822,3 +1476,6 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
index ace65a03a5abb..6f299ab8bb9cf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
@@ -124,187 +124,19 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_brev_b32 s4, 1
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8DAGISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_brev_b32 s4, 1
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8GISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_brev_b32 s4, 1
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9DAGISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_brev_b32 s4, 1
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9GISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_brev_b32 s4, 1
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064DAGISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_brev_b32 s4, 1
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064GISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_brev_b32 s2, 1
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032DAGISEL-NEXT:    s_max_i32 s2, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032GISEL-NEXT:    s_brev_b32 s2, 1
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032GISEL-NEXT:    s_max_i32 s2, s2, s5
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_brev_b32 s4, 1
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164DAGISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_brev_b32 s4, 1
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164GISEL-NEXT:    s_max_i32 s4, s4, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_brev_b32 s2, 1
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132DAGISEL-NEXT:    s_max_i32 s2, s2, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132GISEL-NEXT:    s_brev_b32 s2, 1
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132GISEL-NEXT:    s_max_i32 s2, s2, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_brev_b32 s6, 1
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8DAGISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX8DAGISEL-NEXT:  ; %bb.5:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_brev_b32 s6, 1
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8GISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_brev_b32 s6, 1
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9DAGISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX9DAGISEL-NEXT:  ; %bb.5:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_brev_b32 s6, 1
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9GISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_brev_b32 s6, 1
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064DAGISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1064DAGISEL-NEXT:  ; %bb.5:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_brev_b32 s6, 1
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064GISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_brev_b32 s1, 1
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032DAGISEL-NEXT:    s_max_i32 s1, s1, s6
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1032DAGISEL-NEXT:  ; %bb.5:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032GISEL-NEXT:    s_brev_b32 s0, 1
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032GISEL-NEXT:    s_max_i32 s0, s0, s6
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_brev_b32 s6, 1
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164DAGISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1164DAGISEL-NEXT:  ; %bb.5:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_brev_b32 s6, 1
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164GISEL-NEXT:    s_max_i32 s6, s6, s8
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_brev_b32 s1, 1
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_max_i32 s1, s1, s6
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1132DAGISEL-NEXT:  ; %bb.5:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132GISEL-NEXT:    s_brev_b32 s0, 1
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_max_i32 s0, s0, s6
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,184 +919,6 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-LABEL: divergent_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
@@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX8DAGISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX8GISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX9DAGISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX9GISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX1064DAGISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX1064GISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX1032DAGISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_mov_b32 s4, 0
 ; GFX1032GISEL-NEXT:    s_brev_b32 s5, 1
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s0, 0
 ; GFX1164DAGISEL-NEXT:    s_brev_b32 s1, 1
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_mov_b32 s0, 0
 ; GFX1164GISEL-NEXT:    s_brev_b32 s1, 1
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, 0
 ; GFX1132DAGISEL-NEXT:    s_brev_b32 s1, 1
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
 ; GFX1132GISEL-NEXT:    s_brev_b32 s1, 1
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -1880,3 +1534,6 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
index b12537eb0cebe..3c4cbc74aedc1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
@@ -124,187 +124,19 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_brev_b32 s4, -2
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8DAGISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_brev_b32 s4, -2
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8GISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_brev_b32 s4, -2
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9DAGISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_brev_b32 s4, -2
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9GISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_brev_b32 s4, -2
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064DAGISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_brev_b32 s4, -2
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064GISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_brev_b32 s2, -2
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032DAGISEL-NEXT:    s_min_i32 s2, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032GISEL-NEXT:    s_brev_b32 s2, -2
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032GISEL-NEXT:    s_min_i32 s2, s2, s5
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_brev_b32 s4, -2
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164DAGISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_brev_b32 s4, -2
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164GISEL-NEXT:    s_min_i32 s4, s4, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_brev_b32 s2, -2
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132DAGISEL-NEXT:    s_min_i32 s2, s2, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132GISEL-NEXT:    s_brev_b32 s2, -2
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132GISEL-NEXT:    s_min_i32 s2, s2, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_brev_b32 s6, -2
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8DAGISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX8DAGISEL-NEXT:  ; %bb.5:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_brev_b32 s6, -2
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8GISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_brev_b32 s6, -2
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9DAGISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX9DAGISEL-NEXT:  ; %bb.5:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_brev_b32 s6, -2
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9GISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_brev_b32 s6, -2
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064DAGISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1064DAGISEL-NEXT:  ; %bb.5:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_brev_b32 s6, -2
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064GISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_brev_b32 s1, -2
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032DAGISEL-NEXT:    s_min_i32 s1, s1, s6
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1032DAGISEL-NEXT:  ; %bb.5:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032GISEL-NEXT:    s_brev_b32 s0, -2
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032GISEL-NEXT:    s_min_i32 s0, s0, s6
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_brev_b32 s6, -2
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164DAGISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1164DAGISEL-NEXT:  ; %bb.5:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_brev_b32 s6, -2
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164GISEL-NEXT:    s_min_i32 s6, s6, s8
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_brev_b32 s1, -2
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_min_i32 s1, s1, s6
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1132DAGISEL-NEXT:  ; %bb.5:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132GISEL-NEXT:    s_brev_b32 s0, -2
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_min_i32 s0, s0, s6
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,184 +919,6 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-LABEL: divergent_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
@@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX8DAGISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX8GISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX9DAGISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX9GISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX1064DAGISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX1064GISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX1032DAGISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_mov_b32 s4, -1
 ; GFX1032GISEL-NEXT:    s_brev_b32 s5, -2
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s0, -1
 ; GFX1164DAGISEL-NEXT:    s_brev_b32 s1, -2
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_mov_b32 s0, -1
 ; GFX1164GISEL-NEXT:    s_brev_b32 s1, -2
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, -1
 ; GFX1132DAGISEL-NEXT:    s_brev_b32 s1, -2
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, -1
 ; GFX1132GISEL-NEXT:    s_brev_b32 s1, -2
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -1880,3 +1534,6 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 99b998b5ed177..d6ccf7ce2831d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -124,187 +124,19 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8DAGISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8GISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9DAGISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9GISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064DAGISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064GISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032DAGISEL-NEXT:    s_or_b32 s2, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032GISEL-NEXT:    s_or_b32 s2, s2, s5
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164DAGISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164GISEL-NEXT:    s_or_b32 s4, s4, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132DAGISEL-NEXT:    s_or_b32 s2, s2, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132GISEL-NEXT:    s_or_b32 s2, s2, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8DAGISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX8DAGISEL-NEXT:  ; %bb.5:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8GISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9DAGISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX9DAGISEL-NEXT:  ; %bb.5:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9GISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064DAGISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1064DAGISEL-NEXT:  ; %bb.5:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064GISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032DAGISEL-NEXT:    s_or_b32 s1, s1, s6
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1032DAGISEL-NEXT:  ; %bb.5:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032GISEL-NEXT:    s_or_b32 s0, s0, s6
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164DAGISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1164DAGISEL-NEXT:  ; %bb.5:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164GISEL-NEXT:    s_or_b32 s6, s6, s8
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_or_b32 s1, s1, s6
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1132DAGISEL-NEXT:  ; %bb.5:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_or_b32 s0, s0, s6
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,198 +919,20 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-LABEL: divergent_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX8DAGISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1291,14 +945,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX8GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX8GISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1311,14 +965,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX9DAGISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1331,14 +985,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX9GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX9GISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1351,14 +1005,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX1064DAGISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1370,14 +1024,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX1064GISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1389,14 +1043,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s9, v3, s7
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1408,14 +1062,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s9, v3, s7
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1427,7 +1081,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s6, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s4, v2, s6
@@ -1435,7 +1089,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s6
 ; GFX1164DAGISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1447,7 +1101,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s6, s[2:3]
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s4, v2, s6
@@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s6
 ; GFX1164GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1467,7 +1121,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -1475,7 +1129,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -1494,7 +1148,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1536,19 +1190,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1585,19 +1239,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1634,19 +1288,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1683,19 +1337,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1736,19 +1390,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1787,19 +1441,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -1823,3 +1477,6 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
index bc8bf7f4b3b6f..fab269ea8cfb9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
@@ -215,1989 +215,1081 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
+; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_sub_i32 s4, 0, 0x7b
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s4, s2
+; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX8DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: const_value:
+; GFX8GISEL-LABEL: divergent_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX8GISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: const_value:
+; GFX9DAGISEL-LABEL: divergent_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX9DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT:  ; %bb.2:
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: const_value:
+; GFX9GISEL-LABEL: divergent_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX9GISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT:  ; %bb.2:
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value:
+; GFX1064DAGISEL-LABEL: divergent_value:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1064DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT:  ; %bb.2:
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: const_value:
+; GFX1064GISEL-LABEL: divergent_value:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1064GISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT:  ; %bb.2:
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: const_value:
+; GFX1032DAGISEL-LABEL: divergent_value:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
+; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1032DAGISEL-NEXT:    s_sub_i32 s2, s2, s5
+; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT:  ; %bb.2:
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: const_value:
+; GFX1032GISEL-LABEL: divergent_value:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
+; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1032GISEL-NEXT:    s_sub_i32 s2, s2, s5
+; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: const_value:
+; GFX1164DAGISEL-LABEL: divergent_value:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1164DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT:  ; %bb.2:
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: const_value:
+; GFX1164GISEL-LABEL: divergent_value:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1164GISEL-NEXT:    s_sub_i32 s4, s4, s6
+; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT:  ; %bb.2:
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: const_value:
+; GFX1132DAGISEL-LABEL: divergent_value:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1132DAGISEL-NEXT:    s_sub_i32 s2, s2, s5
+; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT:  ; %bb.2:
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: const_value:
+; GFX1132GISEL-LABEL: divergent_value:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1132GISEL-NEXT:    s_sub_i32 s2, s2, s5
+; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT:  ; %bb.2:
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
 ;
-; GFX12DAGISEL-LABEL: const_value:
+; GFX12DAGISEL-LABEL: divergent_value:
 ; GFX12DAGISEL:       ; %bb.0: ; %entry
 ; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s3, 0, 0x7b
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX12DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX12DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX12DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX12DAGISEL-NEXT:    s_sub_co_i32 s2, s2, s5
+; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX12DAGISEL-NEXT:  ; %bb.2:
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX12DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX12DAGISEL-NEXT:    s_endpgm
 entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1)
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1)
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
+define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
+; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX8DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_sub_i32 s4, 0, s0
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s4, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
+; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX8DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX8DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8DAGISEL-NEXT:  ; %bb.5:
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: poison_value:
+; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8GISEL-NEXT:  ; %bb.1: ; %else
+; GFX8GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
+; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s6
+; GFX8GISEL-NEXT:    s_mul_i32 s6, s3, s2
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX8GISEL-NEXT:  ; %bb.3: ; %if
+; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX8GISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
+; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8GISEL-NEXT:    s_endpgm
+;
+; GFX9DAGISEL-LABEL: divergent_cfg:
+; GFX9DAGISEL:       ; %bb.0: ; %entry
+; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX9DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
 ; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX9DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX9DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9DAGISEL-NEXT:  ; %bb.5:
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: poison_value:
+; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9GISEL-NEXT:  ; %bb.1: ; %else
+; GFX9GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s6
+; GFX9GISEL-NEXT:    s_mul_i32 s6, s3, s2
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX9GISEL-NEXT:  ; %bb.3: ; %if
+; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX9GISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value:
+; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1064DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
+; GFX1064DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
 ; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX1064DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1064DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064DAGISEL-NEXT:  ; %bb.5:
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value:
+; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1064GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1064GISEL-NEXT:    s_sub_i32 s3, 0, s6
+; GFX1064GISEL-NEXT:    s_mul_i32 s6, s3, s2
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1064GISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value:
+; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT:    s_sub_i32 s1, 0, s1
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX1032DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
+; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1032DAGISEL-NEXT:    s_sub_i32 s1, s1, s6
+; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032DAGISEL-NEXT:  ; %bb.5:
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value:
+; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
+; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX1032GISEL-NEXT:    s_sub_i32 s0, 0, s0
+; GFX1032GISEL-NEXT:    s_mul_i32 s0, s0, s2
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
+; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1032GISEL-NEXT:    s_sub_i32 s0, s0, s6
+; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value:
+; GFX1164DAGISEL-LABEL: divergent_cfg:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1164DAGISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
 ; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX1164DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1164DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164DAGISEL-NEXT:  ; %bb.5:
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value:
+; GFX1164GISEL-LABEL: divergent_cfg:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1164GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1164GISEL-NEXT:    s_sub_i32 s3, 0, s6
+; GFX1164GISEL-NEXT:    s_mul_i32 s6, s3, s2
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1164GISEL-NEXT:    s_sub_i32 s6, s6, s8
+; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value:
+; GFX1132DAGISEL-LABEL: divergent_cfg:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX1132DAGISEL-NEXT:    s_sub_i32 s1, 0, s1
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX1132DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1132DAGISEL-NEXT:    s_sub_i32 s1, s1, s6
+; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132DAGISEL-NEXT:  ; %bb.5:
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: poison_value:
+; GFX1132GISEL-LABEL: divergent_cfg:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX1132GISEL-NEXT:    s_sub_i32 s0, 0, s0
+; GFX1132GISEL-NEXT:    s_mul_i32 s0, s0, s2
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1132GISEL-NEXT:    s_sub_i32 s0, s0, s6
+; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
+; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
 ; GFX1132GISEL-NEXT:    s_endpgm
 ;
-; GFX12DAGISEL-LABEL: poison_value:
+; GFX12DAGISEL-LABEL: divergent_cfg:
 ; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX12DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX12DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX12DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX12DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX12DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX12DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX12DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
 ; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s3, 0, s0
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: divergent_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX8DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX8DAGISEL-NEXT:  ; %bb.2:
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX8GISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX8GISEL-NEXT:  ; %bb.2:
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX9DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX9DAGISEL-NEXT:  ; %bb.2:
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX9GISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX9GISEL-NEXT:  ; %bb.2:
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1064DAGISEL-NEXT:  ; %bb.2:
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1064GISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1064GISEL-NEXT:  ; %bb.2:
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s2, s2, s5
-; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1032DAGISEL-NEXT:  ; %bb.2:
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1032GISEL-NEXT:    s_sub_i32 s2, s2, s5
-; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1032GISEL-NEXT:  ; %bb.2:
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1164DAGISEL-NEXT:  ; %bb.2:
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1164GISEL-NEXT:    s_sub_i32 s4, s4, s6
-; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1164GISEL-NEXT:  ; %bb.2:
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s2, s2, s5
-; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1132DAGISEL-NEXT:  ; %bb.2:
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1132GISEL-NEXT:    s_sub_i32 s2, s2, s5
-; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1132GISEL-NEXT:  ; %bb.2:
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: divergent_value:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX12DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX12DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
-; GFX12DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX12DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s2, s2, s5
-; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX12DAGISEL-NEXT:  ; %bb.2:
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
-  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: divergent_cfg:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX8DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX8DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX8DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8DAGISEL-NEXT:  ; %bb.5:
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_cfg:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX8GISEL-NEXT:  ; %bb.1: ; %else
-; GFX8GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX8GISEL-NEXT:    s_mul_i32 s6, s3, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX8GISEL-NEXT:  ; %bb.3: ; %if
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX8GISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_cfg:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX9DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX9DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9DAGISEL-NEXT:  ; %bb.5:
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_cfg:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX9GISEL-NEXT:  ; %bb.1: ; %else
-; GFX9GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX9GISEL-NEXT:    s_mul_i32 s6, s3, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX9GISEL-NEXT:  ; %bb.3: ; %if
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX9GISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_cfg:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1064DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064DAGISEL-NEXT:  ; %bb.5:
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_cfg:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1064GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX1064GISEL-NEXT:    s_mul_i32 s6, s3, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1064GISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_cfg:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
-; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s1, 0, s1
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
-; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s1, s1, s6
-; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032DAGISEL-NEXT:  ; %bb.5:
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_cfg:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
-; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
-; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_sub_i32 s0, 0, s0
-; GFX1032GISEL-NEXT:    s_mul_i32 s0, s0, s2
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
-; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1032GISEL-NEXT:    s_sub_i32 s0, s0, s6
-; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_cfg:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
-; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1164DAGISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164DAGISEL-NEXT:  ; %bb.5:
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_cfg:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
-; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
-; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1164GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_sub_i32 s3, 0, s6
-; GFX1164GISEL-NEXT:    s_mul_i32 s6, s3, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1164GISEL-NEXT:    s_sub_i32 s6, s6, s8
-; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_cfg:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s1, 0, s1
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s1, s1, s6
-; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132DAGISEL-NEXT:  ; %bb.5:
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_cfg:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
-; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
-; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_sub_i32 s0, 0, s0
-; GFX1132GISEL-NEXT:    s_mul_i32 s0, s0, s2
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1132GISEL-NEXT:    s_sub_i32 s0, s0, s6
-; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: divergent_cfg:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX12DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GFX12DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX12DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX12DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX12DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s1, 0, s1
-; GFX12DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX12DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX12DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX12DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX12DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX12DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
-; GFX12DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX12DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s1, s1, s6
-; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
-; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX12DAGISEL-NEXT:  ; %bb.5:
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX12DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX12DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %d_cmp = icmp ult i32 %tid, 16
-  br i1 %d_cmp, label %if, label %else
-
-if:
-  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1)
-  br label %endif
-
-else:
-  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1)
-  br label %endif
-
-endif:
-  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
-  store i32 %combine, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: uniform_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    s_ashr_i32 s0, s4, 31
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    s_mul_i32 s1, s2, s0
-; GFX8DAGISEL-NEXT:    s_mul_i32 s0, s2, s4
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s4
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX8DAGISEL-NEXT:    s_add_u32 s2, s2, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s1, s2, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: uniform_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8GISEL-NEXT:    s_sub_i32 s5, 0, s4
-; GFX8GISEL-NEXT:    s_ashr_i32 s4, s5, 31
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s6, s2, s4
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s2, s5
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s5
-; GFX8GISEL-NEXT:    s_add_u32 s2, s2, s3
-; GFX8GISEL-NEXT:    s_add_u32 s5, s2, s6
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: uniform_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s5, 0, s4
-; GFX9DAGISEL-NEXT:    s_ashr_i32 s4, s5, 31
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s6, s2, s4
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s5
-; GFX9DAGISEL-NEXT:    s_add_u32 s2, s2, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s5, s2, s6
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: uniform_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9GISEL-NEXT:    s_sub_i32 s5, 0, s4
-; GFX9GISEL-NEXT:    s_ashr_i32 s4, s5, 31
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s6, s2, s4
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s2, s5
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s5
-; GFX9GISEL-NEXT:    s_add_u32 s2, s2, s3
-; GFX9GISEL-NEXT:    s_add_u32 s5, s2, s6
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: uniform_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1064DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: uniform_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064GISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1064GISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1064GISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: uniform_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1032DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: uniform_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1032GISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1032GISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1032GISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: uniform_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1164DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: uniform_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1164GISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1164GISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: uniform_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: uniform_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_sub_i32 s4, 0, s4
-; GFX1132GISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1132GISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s6, s3
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s3, s5
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: uniform_value_i64:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s4, 0, s4
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
-; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
-; GFX12DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX12DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s6, s3
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s3, s5
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX12DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX12DAGISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX8DAGISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8GISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1064GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1032GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1164GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1132GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-;
-; GFX12DAGISEL-LABEL: const_value_i64:
-; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
 ; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s2, 0, s2
-; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX12DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX12DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s4, s4, s5
-; GFX12DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s4, s3
-; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX12DAGISEL-NEXT:    s_sub_co_i32 s1, 0, s1
+; GFX12DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX12DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX12DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX12DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX12DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX12DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX12DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
+; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX12DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX12DAGISEL-NEXT:    s_sub_co_i32 s1, s1, s6
+; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
+; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX12DAGISEL-NEXT:  ; %bb.5:
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX12DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX12DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; GFX12DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX12DAGISEL-NEXT:    s_endpgm
 entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %d_cmp = icmp ult i32 %tid, 16
+  br i1 %d_cmp, label %if, label %else
+
+if:
+  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1)
+  br label %endif
+
+else:
+  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1)
+  br label %endif
+
+endif:
+  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
+  store i32 %combine, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
+define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
+; GFX8DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s3, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT:    s_ashr_i32 s0, s4, 31
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX8DAGISEL-NEXT:    s_mul_i32 s1, s2, s0
+; GFX8DAGISEL-NEXT:    s_mul_i32 s0, s2, s4
+; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s4
+; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX8DAGISEL-NEXT:    s_add_u32 s2, s2, s3
+; GFX8DAGISEL-NEXT:    s_add_u32 s1, s2, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: poison_value_i64:
+; GFX8GISEL-LABEL: uniform_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8GISEL-NEXT:    s_ashr_i32 s2, s3, 31
+; GFX8GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8GISEL-NEXT:    s_sub_i32 s5, 0, s4
+; GFX8GISEL-NEXT:    s_ashr_i32 s4, s5, 31
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT:    s_mul_i32 s6, s2, s4
+; GFX8GISEL-NEXT:    s_mul_i32 s4, s2, s5
+; GFX8GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
+; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s5
+; GFX8GISEL-NEXT:    s_add_u32 s2, s2, s3
+; GFX8GISEL-NEXT:    s_add_u32 s5, s2, s6
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: poison_value_i64:
+; GFX9DAGISEL-LABEL: uniform_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX9DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
+; GFX9DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9DAGISEL-NEXT:    s_sub_i32 s5, 0, s4
+; GFX9DAGISEL-NEXT:    s_ashr_i32 s4, s5, 31
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9DAGISEL-NEXT:    s_mul_i32 s6, s2, s4
+; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
+; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
+; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s5
+; GFX9DAGISEL-NEXT:    s_add_u32 s2, s2, s3
+; GFX9DAGISEL-NEXT:    s_add_u32 s5, s2, s6
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: poison_value_i64:
+; GFX9GISEL-LABEL: uniform_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX9GISEL-NEXT:    s_ashr_i32 s2, s3, 31
+; GFX9GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9GISEL-NEXT:    s_sub_i32 s5, 0, s4
+; GFX9GISEL-NEXT:    s_ashr_i32 s4, s5, 31
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT:    s_mul_i32 s6, s2, s4
+; GFX9GISEL-NEXT:    s_mul_i32 s4, s2, s5
+; GFX9GISEL-NEXT:    s_mul_hi_u32 s2, s2, s5
+; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s5
+; GFX9GISEL-NEXT:    s_add_u32 s2, s2, s3
+; GFX9GISEL-NEXT:    s_add_u32 s5, s2, s6
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
+; GFX1064DAGISEL-LABEL: uniform_value_i64:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1064DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1064DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value_i64:
+; GFX1064GISEL-LABEL: uniform_value_i64:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064GISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064GISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1064GISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1064GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1064GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1064GISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1064GISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1064GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1064GISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
+; GFX1032DAGISEL-LABEL: uniform_value_i64:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX1032DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1032DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1032DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value_i64:
+; GFX1032GISEL-LABEL: uniform_value_i64:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032GISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX1032GISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1032GISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1032GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1032GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1032GISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1032GISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1032GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1032GISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
+; GFX1164DAGISEL-LABEL: uniform_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1164DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1164DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1164DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value_i64:
+; GFX1164GISEL-LABEL: uniform_value_i64:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164GISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1164GISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1164GISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1164GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1164GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1164GISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1164GISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1164GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1164GISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
+; GFX1132DAGISEL-LABEL: uniform_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
+; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX1132DAGISEL-NEXT:    s_sub_i32 s4, 0, s4
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1132DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1132DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: poison_value_i64:
+; GFX1132GISEL-LABEL: uniform_value_i64:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132GISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX1132GISEL-NEXT:    s_sub_i32 s4, 0, s4
+; GFX1132GISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1132GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX1132GISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX1132GISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX1132GISEL-NEXT:    s_add_u32 s3, s6, s3
+; GFX1132GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1132GISEL-NEXT:    s_add_u32 s3, s3, s5
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
 ; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
 ;
-; GFX12DAGISEL-LABEL: poison_value_i64:
+; GFX12DAGISEL-LABEL: uniform_value_i64:
 ; GFX12DAGISEL:       ; %bb.0: ; %entry
-; GFX12DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX12DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX12DAGISEL-NEXT:    s_sub_co_i32 s2, 0, s2
+; GFX12DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX12DAGISEL-NEXT:    s_sub_co_i32 s4, 0, s4
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX12DAGISEL-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX12DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX12DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s4, s4, s5
-; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s4, s3
+; GFX12DAGISEL-NEXT:    s_mul_hi_u32 s6, s2, s4
+; GFX12DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
+; GFX12DAGISEL-NEXT:    s_mul_i32 s5, s2, s5
+; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s6, s3
+; GFX12DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s3, s5
 ; GFX12DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX12DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX12DAGISEL-NEXT:    s_endpgm
 entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1)
+  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1)
   store i64 %result, ptr addrspace(1) %out
   ret void
 }
@@ -2208,7 +1300,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2216,7 +1308,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX8DAGISEL-NEXT:    s_subb_u32 s5, s5, s10
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2229,7 +1321,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX8GISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2237,7 +1329,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX8GISEL-NEXT:    s_subb_u32 s5, s5, s10
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2250,7 +1342,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2258,7 +1350,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX9DAGISEL-NEXT:    s_subb_u32 s5, s5, s10
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2271,7 +1363,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX9GISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2279,7 +1371,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s8
 ; GFX9GISEL-NEXT:    s_subb_u32 s5, s5, s10
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2292,7 +1384,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2300,7 +1392,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_sub_u32 s4, s4, s9
 ; GFX1064DAGISEL-NEXT:    s_subb_u32 s5, s5, s10
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2312,7 +1404,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s8, s[6:7]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s9, v2, s8
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s10, v3, s8
@@ -2320,7 +1412,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_sub_u32 s4, s4, s9
 ; GFX1064GISEL-NEXT:    s_subb_u32 s5, s5, s10
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2332,7 +1424,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s9, v3, s7
@@ -2340,7 +1432,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_sub_u32 s4, s4, s8
 ; GFX1032DAGISEL-NEXT:    s_subb_u32 s5, s5, s9
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2352,7 +1444,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s9, v3, s7
@@ -2360,7 +1452,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_sub_u32 s4, s4, s8
 ; GFX1032GISEL-NEXT:    s_subb_u32 s5, s5, s9
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2372,7 +1464,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s4, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s5, v2, s4
@@ -2381,7 +1473,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_sub_u32 s0, s0, s5
 ; GFX1164DAGISEL-NEXT:    s_subb_u32 s1, s1, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -2393,7 +1485,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s4, s[2:3]
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s5, v2, s4
@@ -2402,7 +1494,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_sub_u32 s0, s0, s5
 ; GFX1164GISEL-NEXT:    s_subb_u32 s1, s1, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -2414,7 +1506,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -2423,7 +1515,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_sub_u32 s0, s0, s4
 ; GFX1132DAGISEL-NEXT:    s_subb_u32 s1, s1, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -2434,7 +1526,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -2443,7 +1535,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_sub_u32 s0, s0, s4
 ; GFX1132GISEL-NEXT:    s_subb_u32 s1, s1, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -2458,7 +1550,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX12DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX12DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
 ; GFX12DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
@@ -2468,7 +1560,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
 ; GFX12DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
 ; GFX12DAGISEL-NEXT:    s_sub_nc_u64 s[0:1], s[0:1], s[4:5]
-; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX12DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX12DAGISEL-NEXT:  ; %bb.2:
 ; GFX12DAGISEL-NEXT:    s_wait_alu 0xfffe
 ; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
@@ -2489,7 +1581,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2502,13 +1594,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, s7
 ; GFX8DAGISEL-NEXT:    s_add_u32 s2, s2, s3
 ; GFX8DAGISEL-NEXT:    s_add_u32 s7, s2, s10
-; GFX8DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[2:3]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2522,7 +1614,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    s_add_u32 s7, s4, s8
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s7
-; GFX8DAGISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -2536,7 +1628,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2549,10 +1641,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s7
 ; GFX8GISEL-NEXT:    s_add_u32 s2, s2, s3
 ; GFX8GISEL-NEXT:    s_add_u32 s7, s2, s10
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
@@ -2566,7 +1658,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    s_mul_i32 s5, s5, s7
 ; GFX8GISEL-NEXT:    s_add_u32 s4, s4, s5
 ; GFX8GISEL-NEXT:    s_add_u32 s7, s4, s8
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -2583,7 +1675,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
@@ -2596,13 +1688,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s5
 ; GFX9DAGISEL-NEXT:    s_add_u32 s2, s2, s3
 ; GFX9DAGISEL-NEXT:    s_add_u32 s5, s2, s10
-; GFX9DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[2:3]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
@@ -2616,7 +1708,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    s_add_u32 s5, s5, s8
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9DAGISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -2629,7 +1721,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2642,10 +1734,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s7
 ; GFX9GISEL-NEXT:    s_add_u32 s2, s2, s3
 ; GFX9GISEL-NEXT:    s_add_u32 s7, s2, s10
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
@@ -2659,7 +1751,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    s_mul_i32 s5, s8, s5
 ; GFX9GISEL-NEXT:    s_add_u32 s4, s7, s4
 ; GFX9GISEL-NEXT:    s_add_u32 s7, s4, s5
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2676,7 +1768,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr8_sgpr9
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s8, s[8:9]
@@ -2689,7 +1781,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s10, s3
 ; GFX1064DAGISEL-NEXT:    s_mul_i32 s8, s2, s8
 ; GFX1064DAGISEL-NEXT:    s_add_u32 s9, s3, s9
-; GFX1064DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[4:5]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s8
@@ -2721,7 +1813,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2734,10 +1826,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    s_add_u32 s3, s10, s3
 ; GFX1064GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1064GISEL-NEXT:    s_add_u32 s7, s3, s7
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
@@ -2751,7 +1843,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    s_add_u32 s7, s8, s7
 ; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s4
 ; GFX1064GISEL-NEXT:    s_add_u32 s7, s7, s5
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2768,7 +1860,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
@@ -2781,7 +1873,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s9, s3
 ; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s2, s4
 ; GFX1032DAGISEL-NEXT:    s_add_u32 s5, s3, s5
-; GFX1032DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -2813,7 +1905,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s6, s6
@@ -2826,10 +1918,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    s_add_u32 s3, s9, s3
 ; GFX1032GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1032GISEL-NEXT:    s_add_u32 s7, s3, s7
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
@@ -2843,7 +1935,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    s_add_u32 s5, s5, s7
 ; GFX1032GISEL-NEXT:    s_mul_i32 s6, s6, s3
 ; GFX1032GISEL-NEXT:    s_add_u32 s7, s5, s4
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2862,7 +1954,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2877,7 +1969,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s10, s3
 ; GFX1164DAGISEL-NEXT:    s_mul_i32 s8, s2, s8
 ; GFX1164DAGISEL-NEXT:    s_add_u32 s9, s3, s9
-; GFX1164DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[6:7]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s8
@@ -2913,7 +2005,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2928,10 +2020,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_add_u32 s3, s10, s3
 ; GFX1164GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1164GISEL-NEXT:    s_add_u32 s7, s3, s7
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], exec
@@ -2947,7 +2039,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_add_u32 s5, s8, s5
 ; GFX1164GISEL-NEXT:    s_mul_i32 s6, s4, s6
 ; GFX1164GISEL-NEXT:    s_add_u32 s7, s5, s7
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2966,7 +2058,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2981,7 +2073,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s9, s3
 ; GFX1132DAGISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1132DAGISEL-NEXT:    s_add_u32 s7, s3, s7
-; GFX1132DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -3016,7 +2108,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -3031,10 +2123,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_add_u32 s3, s9, s3
 ; GFX1132GISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX1132GISEL-NEXT:    s_add_u32 s7, s3, s7
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
@@ -3050,7 +2142,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_add_u32 s5, s7, s5
 ; GFX1132GISEL-NEXT:    s_mul_i32 s6, s4, s3
 ; GFX1132GISEL-NEXT:    s_add_u32 s7, s5, s8
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -3068,7 +2160,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX12DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX12DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX12DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX12DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX12DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -3083,7 +2175,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX12DAGISEL-NEXT:    s_add_co_u32 s3, s9, s3
 ; GFX12DAGISEL-NEXT:    s_mul_i32 s6, s2, s6
 ; GFX12DAGISEL-NEXT:    s_add_co_u32 s7, s3, s7
-; GFX12DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX12DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX12DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX12DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX12DAGISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index 1f848d577d2a4..54c8e2e248f57 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -125,153 +125,19 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8DAGISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8GISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9DAGISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9GISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064DAGISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064GISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032DAGISEL-NEXT:    s_max_u32 s2, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032GISEL-NEXT:    s_max_u32 s2, s2, s5
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164DAGISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164GISEL-NEXT:    s_max_u32 s4, s4, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132DAGISEL-NEXT:    s_max_u32 s2, s2, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132GISEL-NEXT:    s_max_u32 s2, s2, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8DAGISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX8DAGISEL-NEXT:  ; %bb.5:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8GISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
@@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9DAGISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX9DAGISEL-NEXT:  ; %bb.5:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9GISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064DAGISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1064DAGISEL-NEXT:  ; %bb.5:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064GISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032DAGISEL-NEXT:    s_max_u32 s1, s1, s6
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1032DAGISEL-NEXT:  ; %bb.5:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032GISEL-NEXT:    s_max_u32 s0, s0, s6
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164DAGISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1164DAGISEL-NEXT:  ; %bb.5:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164GISEL-NEXT:    s_max_u32 s6, s6, s8
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_max_u32 s1, s1, s6
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1132DAGISEL-NEXT:  ; %bb.5:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_max_u32 s0, s0, s6
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1200,7 +1066,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1211,7 +1077,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1224,7 +1090,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1235,7 +1101,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1248,7 +1114,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1259,7 +1125,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1272,7 +1138,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1283,7 +1149,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1296,7 +1162,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1307,7 +1173,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1319,7 +1185,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1330,7 +1196,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1342,7 +1208,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1353,7 +1219,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1365,7 +1231,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1376,7 +1242,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1388,7 +1254,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1400,7 +1266,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1412,7 +1278,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1424,7 +1290,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1436,7 +1302,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1447,7 +1313,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1458,7 +1324,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1469,7 +1335,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB6_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1510,19 +1376,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB7_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB7_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB7_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1559,19 +1425,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB7_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB7_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB7_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1608,19 +1474,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB7_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB7_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB7_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1657,19 +1523,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB7_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB7_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB7_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1710,19 +1576,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB7_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB7_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB7_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1761,19 +1627,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB7_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB7_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB7_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB7_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index c2cfb8828c30c..502ef84449751 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -125,153 +125,19 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8DAGISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX8GISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9DAGISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX9GISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064DAGISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1064GISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032DAGISEL-NEXT:    s_min_u32 s2, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1032GISEL-NEXT:    s_min_u32 s2, s2, s5
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164DAGISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s4, -1
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
 ; GFX1164GISEL-NEXT:    s_min_u32 s4, s4, s6
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
@@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132DAGISEL-NEXT:    s_min_u32 s2, s2, s5
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, -1
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
 ; GFX1132GISEL-NEXT:    s_min_u32 s2, s2, s5
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8DAGISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX8DAGISEL-NEXT:  ; %bb.5:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
@@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX8GISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
@@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9DAGISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX9DAGISEL-NEXT:  ; %bb.5:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX9GISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064DAGISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1064DAGISEL-NEXT:  ; %bb.5:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1064GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1064GISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, -1
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032DAGISEL-NEXT:    s_min_u32 s1, s1, s6
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1032DAGISEL-NEXT:  ; %bb.5:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1032GISEL-NEXT:    s_mov_b32 s0, -1
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1032GISEL-NEXT:    s_min_u32 s0, s0, s6
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
@@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164DAGISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1164DAGISEL-NEXT:  ; %bb.5:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164GISEL-NEXT:    s_mov_b32 s6, -1
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
 ; GFX1164GISEL-NEXT:    s_min_u32 s6, s6, s8
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
 ; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, -1
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_min_u32 s1, s1, s6
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
 ; GFX1132DAGISEL-NEXT:  ; %bb.5:
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
 ; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
@@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
 ; GFX1132GISEL-NEXT:    s_mov_b32 s0, -1
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_min_u32 s0, s0, s6
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1054,153 +920,13 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-LABEL: divergent_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1211,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1224,7 +950,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1235,7 +961,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX8GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1248,7 +974,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1259,7 +985,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1272,7 +998,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1283,7 +1009,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX9GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1296,7 +1022,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1307,7 +1033,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1319,7 +1045,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s12, s[6:7]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1330,7 +1056,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s12
 ; GFX1064GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1342,7 +1068,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1353,7 +1079,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1365,7 +1091,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], -1
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v5, s5
@@ -1376,7 +1102,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_cselect_b64 s[4:5], s[8:9], s[4:5]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1388,7 +1114,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1400,7 +1126,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1412,7 +1138,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s8, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v4, s0
@@ -1424,7 +1150,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s8
 ; GFX1164GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -1436,7 +1162,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1447,7 +1173,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1458,7 +1184,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], -1
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
@@ -1469,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_cselect_b64 s[0:1], s[4:5], s[0:1]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -1510,19 +1236,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -1559,19 +1285,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1608,19 +1334,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1657,19 +1383,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1710,19 +1436,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -1761,19 +1487,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
@@ -1797,3 +1523,6 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index be432c00de728..d5f1750c268ab 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -200,1494 +200,773 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
+; GFX8DAGISEL-LABEL: divergent_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
+; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX8DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX8DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: const_value:
+; GFX8GISEL-LABEL: divergent_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GISEL-NEXT:    s_mulk_i32 s2, 0x7b
+; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX8GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX8GISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: const_value:
+; GFX9DAGISEL-LABEL: divergent_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX9DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT:  ; %bb.2:
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: const_value:
+; GFX9GISEL-LABEL: divergent_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX9GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX9GISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT:  ; %bb.2:
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value:
+; GFX1064DAGISEL-LABEL: divergent_value:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
 ; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1064DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1064DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT:  ; %bb.2:
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: const_value:
+; GFX1064GISEL-LABEL: divergent_value:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1064GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
+; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1064GISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT:  ; %bb.2:
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: const_value:
+; GFX1032DAGISEL-LABEL: divergent_value:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
 ; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1032DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
+; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1032DAGISEL-NEXT:    s_xor_b32 s2, s2, s5
+; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT:  ; %bb.2:
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: const_value:
+; GFX1032GISEL-LABEL: divergent_value:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
+; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1032GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
+; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1032GISEL-NEXT:    s_xor_b32 s2, s2, s5
+; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: const_value:
+; GFX1164DAGISEL-LABEL: divergent_value:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1164DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1164DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT:  ; %bb.2:
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: const_value:
+; GFX1164GISEL-LABEL: divergent_value:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
+; GFX1164GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
+; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
+; GFX1164GISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT:  ; %bb.2:
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: const_value:
+; GFX1132DAGISEL-LABEL: divergent_value:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1132DAGISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1132DAGISEL-NEXT:    s_xor_b32 s2, s2, s5
+; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT:  ; %bb.2:
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: const_value:
+; GFX1132GISEL-LABEL: divergent_value:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
+; GFX1132GISEL-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
+; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
+; GFX1132GISEL-NEXT:    s_xor_b32 s2, s2, s5
+; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT:  ; %bb.2:
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
 entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1)
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1)
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
+define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
+; GFX8DAGISEL-LABEL: divergent_cfg:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX8DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX8DAGISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX8DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX8DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8DAGISEL-NEXT:  ; %bb.5:
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX8DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: poison_value:
+; GFX8GISEL-LABEL: divergent_cfg:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX8GISEL-NEXT:  ; %bb.1: ; %else
+; GFX8GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX8GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX8GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX8GISEL-NEXT:  ; %bb.3: ; %if
+; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX8GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX8GISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: poison_value:
+; GFX9DAGISEL-LABEL: divergent_cfg:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX9DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX9DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
+; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX9DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX9DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9DAGISEL-NEXT:  ; %bb.5:
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX9DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: poison_value:
+; GFX9GISEL-LABEL: divergent_cfg:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX9GISEL-NEXT:  ; %bb.1: ; %else
+; GFX9GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX9GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX9GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX9GISEL-NEXT:  ; %bb.3: ; %if
+; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX9GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX9GISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value:
+; GFX1064DAGISEL-LABEL: divergent_cfg:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1064DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX1064DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1064DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064DAGISEL-NEXT:  ; %bb.5:
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX1064DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value:
+; GFX1064GISEL-LABEL: divergent_cfg:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
+; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1064GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
 ; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX1064GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
+; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1064GISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
+; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value:
+; GFX1032DAGISEL-LABEL: divergent_cfg:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
+; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX1032DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1032DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
+; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1032DAGISEL-NEXT:    s_xor_b32 s1, s1, s6
+; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032DAGISEL-NEXT:  ; %bb.5:
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value:
+; GFX1032GISEL-LABEL: divergent_cfg:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
+; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
 ; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
 ; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX1032GISEL-NEXT:    s_mul_i32 s0, s0, s2
+; GFX1032GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
+; GFX1032GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
+; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1032GISEL-NEXT:    s_xor_b32 s0, s0, s6
+; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value:
+; GFX1164DAGISEL-LABEL: divergent_cfg:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1164DAGISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
+; GFX1164DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: divergent_value:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX8DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX8DAGISEL-NEXT:  ; %bb.2:
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_value:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX8GISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX8GISEL-NEXT:  ; %bb.2:
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_value:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX9DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX9DAGISEL-NEXT:  ; %bb.2:
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_value:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX9GISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX9GISEL-NEXT:  ; %bb.2:
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1064DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1064DAGISEL-NEXT:  ; %bb.2:
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
-; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1064GISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1064GISEL-NEXT:  ; %bb.2:
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1032DAGISEL-NEXT:    s_xor_b32 s2, s2, s5
-; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1032DAGISEL-NEXT:  ; %bb.2:
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
-; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1032GISEL-NEXT:    s_xor_b32 s2, s2, s5
-; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1032GISEL-NEXT:  ; %bb.2:
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1164DAGISEL-NEXT:    s_xor_b32 s4, s4, s6
+; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1164DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1164DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1164DAGISEL-NEXT:  ; %bb.2:
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
-; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
-; GFX1164GISEL-NEXT:    s_xor_b32 s4, s4, s6
-; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1164GISEL-NEXT:  ; %bb.2:
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1132DAGISEL-NEXT:    s_xor_b32 s2, s2, s5
-; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1132DAGISEL-NEXT:  ; %bb.2:
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
-; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
-; GFX1132GISEL-NEXT:    s_xor_b32 s2, s2, s5
-; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
-; GFX1132GISEL-NEXT:  ; %bb.2:
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
-  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1)
-  store i32 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: divergent_cfg:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX8DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX8DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX8DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8DAGISEL-NEXT:  ; %bb.5:
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_cfg:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX8GISEL-NEXT:  ; %bb.1: ; %else
-; GFX8GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX8GISEL-NEXT:  ; %bb.3: ; %if
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX8GISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_cfg:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX9DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX9DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX9DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9DAGISEL-NEXT:  ; %bb.5:
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_cfg:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX9GISEL-NEXT:  ; %bb.1: ; %else
-; GFX9GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX9GISEL-NEXT:  ; %bb.3: ; %if
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX9GISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_cfg:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1064DAGISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX1064DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1064DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064DAGISEL-NEXT:  ; %bb.5:
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_cfg:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
-; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1064GISEL-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
-; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1064GISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_cfg:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
-; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
-; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1032DAGISEL-NEXT:    s_xor_b32 s1, s1, s6
-; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032DAGISEL-NEXT:  ; %bb.5:
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_cfg:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
-; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
-; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s0, s0, s2
-; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
-; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1032GISEL-NEXT:    s_xor_b32 s0, s0, s6
-; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_cfg:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
-; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1164DAGISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s6, s2
-; GFX1164DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1164DAGISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164DAGISEL-NEXT:  ; %bb.5:
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_cfg:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
-; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
-; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1164GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s6, s6, s2
-; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
-; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
-; GFX1164GISEL-NEXT:    s_xor_b32 s6, s6, s8
-; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_cfg:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
-; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
-; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
-; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
-; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1132DAGISEL-NEXT:    s_xor_b32 s1, s1, s6
-; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132DAGISEL-NEXT:  ; %bb.5:
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
-; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_cfg:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
-; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
-; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
-; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
-; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s0, s0, s2
-; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
-; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
-; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
-; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
-; GFX1132GISEL-NEXT:    s_xor_b32 s0, s0, s6
-; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
-; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %d_cmp = icmp ult i32 %tid, 16
-  br i1 %d_cmp, label %if, label %else
-
-if:
-  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1)
-  br label %endif
-
-else:
-  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1)
-  br label %endif
-
-endif:
-  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
-  store i32 %combine, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: uniform_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8DAGISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    s_mul_i32 s1, s3, s4
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    s_mul_i32 s0, s2, s4
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: uniform_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: uniform_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9DAGISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: uniform_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: uniform_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: uniform_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: uniform_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1032DAGISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: uniform_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1032GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: uniform_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: uniform_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: uniform_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1132DAGISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: uniform_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s4, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s4, s4, 1
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s2, s4
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s4
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
-entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1)
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT:    s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT:    s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT:    s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT:    s_endpgm
-;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164DAGISEL-NEXT:  ; %bb.5:
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
+; GFX1164DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: const_value_i64:
+; GFX1164GISEL-LABEL: divergent_cfg:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1164GISEL-NEXT:    s_load_b32 s6, s[4:5], 0x2c
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164GISEL-NEXT:    s_mul_i32 s6, s6, s2
+; GFX1164GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_mov_b32 s6, 0
+; GFX1164GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
+; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
+; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
+; GFX1164GISEL-NEXT:    s_xor_b32 s6, s6, s8
+; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
+; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: const_value_i64:
+; GFX1132DAGISEL-LABEL: divergent_cfg:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
+; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
+; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
+; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s1, s1, s2
+; GFX1132DAGISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB2_6
+; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, 0
+; GFX1132DAGISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1132DAGISEL-NEXT:    s_xor_b32 s1, s1, s6
+; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132DAGISEL-NEXT:  ; %bb.5:
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT:  .LBB2_6: ; %endif
+; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: const_value_i64:
+; GFX1132GISEL-LABEL: divergent_cfg:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
+; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
+; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, 0
+; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT:    s_mul_i32 s0, s0, s2
+; GFX1132GISEL-NEXT:  .LBB2_2: ; %Flow
+; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB2_5
+; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
+; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT:    s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT:  .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
+; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
+; GFX1132GISEL-NEXT:    s_xor_b32 s0, s0, s6
+; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT:  .LBB2_5: ; %endif
+; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
+; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
 ; GFX1132GISEL-NEXT:    s_endpgm
 entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1)
-  store i64 %result, ptr addrspace(1) %out
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %d_cmp = icmp ult i32 %tid, 16
+  br i1 %d_cmp, label %if, label %else
+
+if:
+  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1)
+  br label %endif
+
+else:
+  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1)
+  br label %endif
+
+endif:
+  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
+  store i32 %combine, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
+define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX8DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8DAGISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
+; GFX8DAGISEL-NEXT:    s_mul_i32 s1, s3, s4
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8DAGISEL-NEXT:    s_mul_i32 s0, s2, s4
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
-; GFX8GISEL-LABEL: poison_value_i64:
+; GFX8GISEL-LABEL: uniform_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX8GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX8GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
@@ -1695,159 +974,159 @@ define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
-; GFX9DAGISEL-LABEL: poison_value_i64:
+; GFX9DAGISEL-LABEL: uniform_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX9DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9DAGISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
-; GFX9GISEL-LABEL: poison_value_i64:
+; GFX9GISEL-LABEL: uniform_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX9GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX9GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
+; GFX1064DAGISEL-LABEL: uniform_value_i64:
 ; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064DAGISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value_i64:
+; GFX1064GISEL-LABEL: uniform_value_i64:
 ; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1064GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1064GISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1064GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
+; GFX1032DAGISEL-LABEL: uniform_value_i64:
 ; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX1032DAGISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1032DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value_i64:
+; GFX1032GISEL-LABEL: uniform_value_i64:
 ; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX1032GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1032GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1032GISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX1032GISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
+; GFX1164DAGISEL-LABEL: uniform_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1164DAGISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value_i64:
+; GFX1164GISEL-LABEL: uniform_value_i64:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1164GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1164GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1164GISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
+; GFX1132DAGISEL-LABEL: uniform_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
+; GFX1132DAGISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: poison_value_i64:
+; GFX1132GISEL-LABEL: uniform_value_i64:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s3, s2, 1
+; GFX1132GISEL-NEXT:    s_and_b32 s4, s4, 1
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s1, s3
+; GFX1132GISEL-NEXT:    s_mul_i32 s2, s2, s4
+; GFX1132GISEL-NEXT:    s_mul_i32 s3, s3, s4
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
 ; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
 entry:
-  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1)
+  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1)
   store i64 %result, ptr addrspace(1) %out
   ret void
 }
@@ -1858,14 +1137,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX8DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8DAGISEL-NEXT:  ; %bb.2:
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1878,14 +1157,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX8GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX8GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX8GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX8GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX8GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX8GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX8GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX8GISEL-NEXT:  ; %bb.2:
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1898,14 +1177,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX9DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9DAGISEL-NEXT:  ; %bb.2:
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1918,14 +1197,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX9GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX9GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX9GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX9GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX9GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX9GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX9GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX9GISEL-NEXT:  ; %bb.2:
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1938,14 +1217,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX1064DAGISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064DAGISEL-NEXT:  ; %bb.2:
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1957,14 +1236,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1064GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GFX1064GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v2, s10
 ; GFX1064GISEL-NEXT:    v_readlane_b32 s9, v3, s10
 ; GFX1064GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1064GISEL-NEXT:  ; %bb.2:
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1976,14 +1255,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032DAGISEL-NEXT:    v_readlane_b32 s9, v3, s7
 ; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032DAGISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032DAGISEL-NEXT:  ; %bb.2:
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -1995,14 +1274,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1032GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
-; GFX1032GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s7, s6
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s8, v2, s7
 ; GFX1032GISEL-NEXT:    v_readlane_b32 s9, v3, s7
 ; GFX1032GISEL-NEXT:    s_bitset0_b32 s6, s7
 ; GFX1032GISEL-NEXT:    s_xor_b64 s[4:5], s[4:5], s[8:9]
 ; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1032GISEL-NEXT:  ; %bb.2:
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v3, s5
@@ -2014,7 +1293,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s6, s[2:3]
 ; GFX1164DAGISEL-NEXT:    v_readlane_b32 s4, v2, s6
@@ -2022,7 +1301,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s6
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164DAGISEL-NEXT:  ; %bb.2:
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -2034,7 +1313,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s6, s[2:3]
 ; GFX1164GISEL-NEXT:    v_readlane_b32 s4, v2, s6
@@ -2042,7 +1321,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s6
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1164GISEL-NEXT:  ; %bb.2:
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, s0
@@ -2054,7 +1333,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132DAGISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -2062,7 +1341,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132DAGISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132DAGISEL-NEXT:  ; %bb.2:
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132DAGISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -2073,7 +1352,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
 ; GFX1132GISEL-NEXT:    v_readlane_b32 s4, v2, s3
@@ -2081,7 +1360,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
 ; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
 ; GFX1132GISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 ; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
 ; GFX1132GISEL-NEXT:  ; %bb.2:
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
 ; GFX1132GISEL-NEXT:    global_store_b64 v[0:1], v[2:3], off
@@ -2101,7 +1380,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2109,7 +1388,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX8DAGISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX8DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -2137,7 +1416,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX8GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2145,10 +1424,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX8GISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX8GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX8GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
 ; GFX8GISEL-NEXT:    s_mov_b64 s[6:7], exec
@@ -2157,7 +1436,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    s_mul_i32 s6, s4, s7
 ; GFX8GISEL-NEXT:    s_mul_i32 s7, s5, s7
-; GFX8GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX8GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
@@ -2174,7 +1453,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
@@ -2182,7 +1461,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
 ; GFX9DAGISEL-NEXT:    s_mul_i32 s5, s3, s5
-; GFX9DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -2209,7 +1488,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX9GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2217,10 +1496,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX9GISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX9GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX9GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX9GISEL-NEXT:    s_mov_b64 s[4:5], exec
@@ -2229,7 +1508,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    s_mul_i32 s6, s6, s4
 ; GFX9GISEL-NEXT:    s_mul_i32 s7, s7, s4
-; GFX9GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX9GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2246,7 +1525,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064DAGISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
@@ -2254,7 +1533,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
 ; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s3, s5
-; GFX1064DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -2281,7 +1560,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
 ; GFX1064GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
@@ -2289,10 +1568,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX1064GISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX1064GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1064GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1064GISEL-NEXT:    s_mov_b64 s[4:5], exec
@@ -2301,7 +1580,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064GISEL-NEXT:    s_mul_i32 s6, s6, s4
 ; GFX1064GISEL-NEXT:    s_mul_i32 s7, s7, s4
-; GFX1064GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2318,7 +1597,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032DAGISEL-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s4, s4
@@ -2326,7 +1605,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s2, s5
 ; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s3, s5
-; GFX1032DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -2353,7 +1632,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr6_sgpr7
 ; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GFX1032GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032GISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s6, s6
@@ -2361,10 +1640,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX1032GISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX1032GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1032GISEL-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
 ; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
@@ -2373,7 +1652,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032GISEL-NEXT:    s_mul_i32 s6, s6, s3
 ; GFX1032GISEL-NEXT:    s_mul_i32 s7, s7, s3
-; GFX1032GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2392,7 +1671,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2401,7 +1680,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX1164DAGISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX1164DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[2:3], s[8:9]
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s6
@@ -2432,7 +1711,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1164GISEL-NEXT:    s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2441,10 +1720,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX1164GISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX1164GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1164GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1164GISEL-NEXT:    s_mov_b64 s[6:7], exec
@@ -2454,7 +1733,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    s_mul_i32 s6, s4, s7
 ; GFX1164GISEL-NEXT:    s_mul_i32 s7, s5, s7
-; GFX1164GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s7
@@ -2473,7 +1752,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2482,7 +1761,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX1132DAGISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX1132DAGISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s2, s8
 ; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -2512,7 +1791,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
 ; GFX1132GISEL-NEXT:    s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132GISEL-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2521,10 +1800,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mul_i32 s6, s2, s7
 ; GFX1132GISEL-NEXT:    s_mul_i32 s7, s3, s7
-; GFX1132GISEL-NEXT:  .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT:  .LBB5_2: ; %Flow
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB5_4
 ; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
 ; GFX1132GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
 ; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
@@ -2534,7 +1813,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    s_mul_i32 s6, s4, s3
 ; GFX1132GISEL-NEXT:    s_mul_i32 s7, s5, s3
-; GFX1132GISEL-NEXT:  .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT:  .LBB5_4: ; %endif
 ; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
 ; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
index 863598578ea77..a27a121a3af61 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
@@ -2,13 +2,14 @@
 ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
 
 ; --------------------------------------------------------------------
-; llvm.amdgcn.wave.reduce.umin.i32
+; llvm.amdgcn.wave.reduce.umin
 ; --------------------------------------------------------------------
 
 declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.umin.i64(i64, i32 immarg)
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_poison(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -19,8 +20,8 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -31,14 +32,94 @@ entry:
   ret void
 }
 
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.min
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.min.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.min.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
 ; --------------------------------------------------------------------
-; llvm.amdgcn.wave.reduce.umin.i32
+; llvm.amdgcn.wave.reduce.umax
 ; --------------------------------------------------------------------
 
 declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.umax.i64(i64, i32 immarg)
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_poison(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -49,8 +130,8 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -61,6 +142,30 @@ entry:
   ret void
 }
 
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
 @gv = constant i32 0
 define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv(
@@ -74,3 +179,333 @@ entry:
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.max
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.max.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.max.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.add
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.add.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.add.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.sub
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.sub.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.sub.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.and
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.and.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.and.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.or
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.or.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.or.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.xor
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.xor.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.xor.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}



More information about the llvm-branch-commits mailing list