[llvm-branch-commits] [llvm] [AMDGPU] Propagate Constants for Wave Reduction Intrinsics (PR #150395)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Aug 6 23:27:29 PDT 2025
https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/150395
>From 92fb3dc5d0c6d7463c52c5b05288d2a58f1dbde2 Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Thu, 24 Jul 2025 15:14:30 +0530
Subject: [PATCH] Propagate Constants for Wave Reduction Intrinsics
---
llvm/lib/Analysis/ConstantFolding.cpp | 14 +
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll | 2165 +++++----------
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll | 613 +----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll | 613 +----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll | 613 +----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll | 613 +----
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll | 2393 ++++++-----------
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 493 +---
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 541 +---
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll | 2121 +++++----------
.../ConstProp/AMDGPU/wave.reduce.ll | 455 +++-
11 files changed, 3448 insertions(+), 7186 deletions(-)
mode change 100644 => 100755 llvm/lib/Analysis/ConstantFolding.cpp
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
old mode 100644
new mode 100755
index e71ba5ea5521e..11d22c75831d0
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
case Intrinsic::amdgcn_s_wqm:
case Intrinsic::amdgcn_s_quadmask:
case Intrinsic::amdgcn_s_bitreplicate:
@@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
return ConstantInt::get(Ty, C0->abs());
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
return dyn_cast<Constant>(Operands[0]);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index b6af8b4bb798d..f5e4060e6e623 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -187,1644 +187,931 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
+; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX8DAGISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: const_value:
+; GFX8GISEL-LABEL: divergent_value:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: s_mov_b32 s4, 0
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX8GISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: const_value:
+; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX9DAGISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: ; %bb.2:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: const_value:
+; GFX9GISEL-LABEL: divergent_value:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX9GISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: ; %bb.2:
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value:
+; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1064DAGISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: ; %bb.2:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: const_value:
+; GFX1064GISEL-LABEL: divergent_value:
; GFX1064GISEL: ; %bb.0: ; %entry
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1064GISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: ; %bb.2:
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: const_value:
+; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1032DAGISEL-NEXT: s_add_i32 s2, s2, s5
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: ; %bb.2:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: const_value:
+; GFX1032GISEL-LABEL: divergent_value:
; GFX1032GISEL: ; %bb.0: ; %entry
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
+; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
+; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1032GISEL-NEXT: s_add_i32 s2, s2, s5
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: const_value:
+; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1164DAGISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: ; %bb.2:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: const_value:
+; GFX1164GISEL-LABEL: divergent_value:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1164GISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: ; %bb.2:
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: const_value:
+; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1132DAGISEL-NEXT: s_add_i32 s2, s2, s5
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: ; %bb.2:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: const_value:
+; GFX1132GISEL-LABEL: divergent_value:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1132GISEL-NEXT: s_add_i32 s2, s2, s5
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: ; %bb.2:
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1)
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1)
store i32 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
+define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
+; GFX8DAGISEL-LABEL: divergent_cfg:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
+; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX8DAGISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8DAGISEL-NEXT: ; %bb.5:
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: poison_value:
+; GFX8GISEL-LABEL: divergent_cfg:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8GISEL-NEXT: ; %bb.1: ; %else
+; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX8GISEL-NEXT: ; %bb.3: ; %if
+; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT: s_mov_b32 s6, 0
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX8GISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
+; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: poison_value:
+; GFX9DAGISEL-LABEL: divergent_cfg:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2
+; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: divergent_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX8DAGISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX8DAGISEL-NEXT: ; %bb.2:
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX8GISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX8GISEL-NEXT: ; %bb.2:
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX9DAGISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX9DAGISEL-NEXT: ; %bb.2:
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX9GISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX9GISEL-NEXT: ; %bb.2:
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1064DAGISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1064DAGISEL-NEXT: ; %bb.2:
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1064GISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1064GISEL-NEXT: ; %bb.2:
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
-; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1032DAGISEL-NEXT: s_add_i32 s2, s2, s5
-; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1032DAGISEL-NEXT: ; %bb.2:
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
-; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1032GISEL-NEXT: s_add_i32 s2, s2, s5
-; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1032GISEL-NEXT: ; %bb.2:
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1164DAGISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1164DAGISEL-NEXT: ; %bb.2:
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1164GISEL-NEXT: s_add_i32 s4, s4, s6
-; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1164GISEL-NEXT: ; %bb.2:
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1132DAGISEL-NEXT: s_add_i32 s2, s2, s5
-; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
-; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1132GISEL-NEXT: s_add_i32 s2, s2, s5
-; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1132GISEL-NEXT: ; %bb.2:
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %id.x, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: divergent_cfg:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX8DAGISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8DAGISEL-NEXT: ; %bb.5:
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_cfg:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX8GISEL-NEXT: ; %bb.1: ; %else
-; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX8GISEL-NEXT: ; %bb.3: ; %if
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX8GISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
-; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_cfg:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX9DAGISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9DAGISEL-NEXT: ; %bb.5:
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_cfg:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX9GISEL-NEXT: ; %bb.1: ; %else
-; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX9GISEL-NEXT: ; %bb.3: ; %if
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX9GISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
-; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_cfg:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1064DAGISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064DAGISEL-NEXT: ; %bb.5:
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_cfg:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1064GISEL-NEXT: ; %bb.1: ; %else
-; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1064GISEL-NEXT: ; %bb.3: ; %if
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1064GISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_cfg:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
-; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
-; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
-; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1032DAGISEL-NEXT: s_add_i32 s1, s1, s6
-; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032DAGISEL-NEXT: ; %bb.5:
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_cfg:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0
-; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
-; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1032GISEL-NEXT: ; %bb.1: ; %else
-; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1032GISEL-NEXT: ; %bb.3: ; %if
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
-; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1032GISEL-NEXT: s_add_i32 s0, s0, s6
-; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_cfg:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
-; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1164DAGISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164DAGISEL-NEXT: ; %bb.5:
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_cfg:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
-; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1164GISEL-NEXT: ; %bb.1: ; %else
-; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1164GISEL-NEXT: ; %bb.3: ; %if
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1164GISEL-NEXT: s_add_i32 s6, s6, s8
-; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_cfg:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
-; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1132DAGISEL-NEXT: s_add_i32 s1, s1, s6
-; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132DAGISEL-NEXT: ; %bb.5:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_cfg:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
-; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
-; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1132GISEL-NEXT: ; %bb.1: ; %else
-; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1132GISEL-NEXT: ; %bb.3: ; %if
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1132GISEL-NEXT: s_add_i32 s0, s0, s6
-; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %tid = call i32 @llvm.amdgcn.workitem.id.x()
- %d_cmp = icmp ult i32 %tid, 16
- br i1 %d_cmp, label %if, label %else
-
-if:
- %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1)
- br label %endif
-
-else:
- %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1)
- br label %endif
-
-endif:
- %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
- store i32 %combine, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: uniform_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s1, s2, s4
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s4
-; GFX8DAGISEL-NEXT: s_add_u32 s1, s1, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: uniform_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5
-; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5
-; GFX8GISEL-NEXT: s_add_u32 s5, s2, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: uniform_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5
-; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: uniform_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5
-; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5
-; GFX9GISEL-NEXT: s_add_u32 s5, s2, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: uniform_value_i64:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: uniform_value_i64:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1064GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: uniform_value_i64:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: uniform_value_i64:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1032GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: uniform_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: uniform_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1164GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: uniform_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: uniform_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
-; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1132GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX9DAGISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9DAGISEL-NEXT: ; %bb.5:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: const_value_i64:
+; GFX9GISEL-LABEL: divergent_cfg:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9GISEL-NEXT: ; %bb.1: ; %else
+; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX9GISEL-NEXT: ; %bb.3: ; %if
+; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT: s_mov_b32 s6, 0
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX9GISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
+; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value_i64:
+; GFX1064DAGISEL-LABEL: divergent_cfg:
; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1064DAGISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064DAGISEL-NEXT: ; %bb.5:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: const_value_i64:
+; GFX1064GISEL-LABEL: divergent_cfg:
; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1064GISEL-NEXT: ; %bb.1: ; %else
+; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1064GISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1064GISEL-NEXT: ; %bb.3: ; %if
+; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1064GISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: const_value_i64:
+; GFX1032DAGISEL-LABEL: divergent_cfg:
; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
+; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
+; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1032DAGISEL-NEXT: s_add_i32 s1, s1, s6
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032DAGISEL-NEXT: ; %bb.5:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: const_value_i64:
+; GFX1032GISEL-LABEL: divergent_cfg:
; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1032GISEL-NEXT: ; %bb.1: ; %else
+; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1032GISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1032GISEL-NEXT: ; %bb.3: ; %if
+; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
+; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1032GISEL-NEXT: s_add_i32 s0, s0, s6
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: const_value_i64:
+; GFX1164DAGISEL-LABEL: divergent_cfg:
; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1164DAGISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1164DAGISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164DAGISEL-NEXT: ; %bb.5:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: const_value_i64:
+; GFX1164GISEL-LABEL: divergent_cfg:
; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1164GISEL-NEXT: ; %bb.1: ; %else
+; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1164GISEL-NEXT: ; %bb.3: ; %if
+; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1164GISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1164GISEL-NEXT: s_add_i32 s6, s6, s8
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: const_value_i64:
+; GFX1132DAGISEL-LABEL: divergent_cfg:
; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1132DAGISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
+; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1132DAGISEL-NEXT: s_add_i32 s1, s1, s6
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132DAGISEL-NEXT: ; %bb.5:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: const_value_i64:
+; GFX1132GISEL-LABEL: divergent_cfg:
; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1132GISEL-NEXT: ; %bb.1: ; %else
+; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_mul_hi_u32 s3, 0x7b, s2
-; GFX1132GISEL-NEXT: s_mul_i32 s4, s2, 0
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1132GISEL-NEXT: ; %bb.3: ; %if
+; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1132GISEL-NEXT: s_add_i32 s0, s0, s6
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
+; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %d_cmp = icmp ult i32 %tid, 16
+ br i1 %d_cmp, label %if, label %else
+
+if:
+ %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %tid, i32 1)
+ br label %endif
+
+else:
+ %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %in, i32 1)
+ br label %endif
+
+endif:
+ %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
+ store i32 %combine, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
+define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
+; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4
+; GFX8DAGISEL-NEXT: s_mul_hi_u32 s1, s2, s4
+; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s4
+; GFX8DAGISEL-NEXT: s_add_u32 s1, s1, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: poison_value_i64:
+; GFX8GISEL-LABEL: uniform_value_i64:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
+; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT: s_mul_hi_u32 s4, s0, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5
+; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
+; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5
+; GFX8GISEL-NEXT: s_add_u32 s5, s2, s3
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: poison_value_i64:
+; GFX9DAGISEL-LABEL: uniform_value_i64:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
+; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5
+; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5
+; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5
+; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: poison_value_i64:
+; GFX9GISEL-LABEL: uniform_value_i64:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
+; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT: s_mul_hi_u32 s4, s0, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5
+; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
+; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5
+; GFX9GISEL-NEXT: s_add_u32 s5, s2, s3
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
+; GFX1064DAGISEL-LABEL: uniform_value_i64:
; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1064DAGISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: poison_value_i64:
+; GFX1064GISEL-LABEL: uniform_value_i64:
; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1064GISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1064GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1064GISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
+; GFX1032DAGISEL-LABEL: uniform_value_i64:
; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1032DAGISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: poison_value_i64:
+; GFX1032GISEL-LABEL: uniform_value_i64:
; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1032GISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1032GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1032GISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
+; GFX1164DAGISEL-LABEL: uniform_value_i64:
; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1164DAGISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1164DAGISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value_i64:
+; GFX1164GISEL-LABEL: uniform_value_i64:
; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1164GISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1164GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1164GISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
+; GFX1132DAGISEL-LABEL: uniform_value_i64:
; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1132DAGISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1132DAGISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: poison_value_i64:
+; GFX1132GISEL-LABEL: uniform_value_i64:
; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_hi_u32 s3, s0, s2
-; GFX1132GISEL-NEXT: s_mul_i32 s4, s1, s2
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s4
+; GFX1132GISEL-NEXT: s_mul_hi_u32 s5, s2, s4
+; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1132GISEL-NEXT: s_add_u32 s3, s5, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1)
+ %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %in, i32 1)
store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -1836,7 +1123,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX8DAGISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -1844,7 +1131,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX8DAGISEL-NEXT: s_addc_u32 s5, s5, s10
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1858,7 +1145,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX8GISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -1866,7 +1153,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX8GISEL-NEXT: s_addc_u32 s5, s5, s10
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1880,7 +1167,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX9DAGISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -1888,7 +1175,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX9DAGISEL-NEXT: s_addc_u32 s5, s5, s10
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1902,7 +1189,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX9GISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -1910,7 +1197,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX9GISEL-NEXT: s_addc_u32 s5, s5, s10
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1924,7 +1211,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX1064DAGISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -1932,7 +1219,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s9
; GFX1064DAGISEL-NEXT: s_addc_u32 s5, s5, s10
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1945,7 +1232,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX1064GISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -1953,7 +1240,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s9
; GFX1064GISEL-NEXT: s_addc_u32 s5, s5, s10
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1966,7 +1253,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
@@ -1974,7 +1261,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s8
; GFX1032DAGISEL-NEXT: s_addc_u32 s5, s5, s9
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1987,7 +1274,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
@@ -1995,7 +1282,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s8
; GFX1032GISEL-NEXT: s_addc_u32 s5, s5, s9
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2008,7 +1295,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4
@@ -2017,7 +1304,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_add_u32 s0, s0, s5
; GFX1164DAGISEL-NEXT: s_addc_u32 s1, s1, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2030,7 +1317,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4
@@ -2039,7 +1326,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_add_u32 s0, s0, s5
; GFX1164GISEL-NEXT: s_addc_u32 s1, s1, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2052,7 +1339,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2061,7 +1348,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_add_u32 s0, s0, s4
; GFX1132DAGISEL-NEXT: s_addc_u32 s1, s1, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2073,7 +1360,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2082,7 +1369,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_add_u32 s0, s0, s4
; GFX1132GISEL-NEXT: s_addc_u32 s1, s1, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2102,7 +1389,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7]
@@ -2111,7 +1398,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s7
; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s7
; GFX8DAGISEL-NEXT: s_add_u32 s7, s2, s3
-; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -2140,7 +1427,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7]
@@ -2149,10 +1436,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s7
; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s7
; GFX8GISEL-NEXT: s_add_u32 s7, s2, s3
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -2162,7 +1449,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_mul_hi_u32 s4, s4, s7
; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s7
; GFX8GISEL-NEXT: s_add_u32 s7, s4, s5
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -2179,7 +1466,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s5, s[4:5]
@@ -2188,7 +1475,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5
; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5
; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s3
-; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -2216,7 +1503,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s7, s[6:7]
@@ -2225,10 +1512,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s7
; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s7
; GFX9GISEL-NEXT: s_add_u32 s7, s2, s3
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -2238,7 +1525,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_mul_hi_u32 s5, s8, s4
; GFX9GISEL-NEXT: s_mul_i32 s4, s9, s4
; GFX9GISEL-NEXT: s_add_u32 s7, s5, s4
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2255,7 +1542,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_mov_b64 s[8:9], exec
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s8, s[8:9]
@@ -2264,7 +1551,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s8
; GFX1064DAGISEL-NEXT: s_mul_i32 s8, s2, s8
; GFX1064DAGISEL-NEXT: s_add_u32 s9, s9, s3
-; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[4:5]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s8
@@ -2292,7 +1579,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2301,10 +1588,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s6
; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1064GISEL-NEXT: s_add_u32 s7, s7, s3
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -2314,7 +1601,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_mul_i32 s7, s7, s4
; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4
; GFX1064GISEL-NEXT: s_add_u32 s7, s5, s7
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2331,7 +1618,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
@@ -2340,7 +1627,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s4
; GFX1032DAGISEL-NEXT: s_add_u32 s5, s5, s3
-; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -2368,7 +1655,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6
@@ -2377,10 +1664,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s6
; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1032GISEL-NEXT: s_add_u32 s7, s7, s3
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -2390,7 +1677,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_mul_i32 s5, s7, s3
; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3
; GFX1032GISEL-NEXT: s_add_u32 s7, s4, s5
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2409,7 +1696,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_mov_b64 s[8:9], exec
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -2419,7 +1706,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s8
; GFX1164DAGISEL-NEXT: s_mul_i32 s8, s2, s8
; GFX1164DAGISEL-NEXT: s_add_u32 s9, s9, s3
-; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[6:7]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s8
@@ -2451,7 +1738,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -2461,10 +1748,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s6
; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1164GISEL-NEXT: s_add_u32 s7, s7, s3
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -2475,7 +1762,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_mul_i32 s5, s5, s6
; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s6
; GFX1164GISEL-NEXT: s_add_u32 s7, s7, s5
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2494,7 +1781,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -2504,7 +1791,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s6
; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1132DAGISEL-NEXT: s_add_u32 s7, s7, s3
-; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -2534,7 +1821,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -2544,10 +1831,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s6
; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1132GISEL-NEXT: s_add_u32 s7, s7, s3
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -2558,7 +1845,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_mul_i32 s5, s5, s3
; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3
; GFX1132GISEL-NEXT: s_add_u32 s7, s7, s5
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index 55e6189f65675..87642a1f0b957 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -124,187 +124,19 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, -1
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, -1
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, -1
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_and_b32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, -1
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_and_b32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, -1
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, -1
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, -1
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_and_b32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s0, -1
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_and_b32 s0, s0, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, -1
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_and_b32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_and_b32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s0, -1
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_and_b32 s0, s0, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,184 +919,6 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-LABEL: divergent_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
@@ -1272,14 +926,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1293,14 +947,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b32 s4, -1
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1314,14 +968,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1335,14 +989,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b32 s4, -1
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1356,14 +1010,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1376,14 +1030,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b32 s4, -1
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1396,14 +1050,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1416,14 +1070,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s4, -1
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1436,7 +1090,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1457,7 +1111,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b32 s0, -1
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -1465,7 +1119,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1478,7 +1132,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1498,7 +1152,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s0, -1
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1506,7 +1160,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1547,19 +1201,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -1596,19 +1250,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1645,19 +1299,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1694,19 +1348,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1747,19 +1401,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1798,19 +1452,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
@@ -1834,3 +1488,6 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
index 96b67e71fcd28..50da8b34f8555 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
@@ -124,187 +124,19 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_brev_b32 s4, 1
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_max_i32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_brev_b32 s4, 1
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_max_i32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s4, 1
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_max_i32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_brev_b32 s4, 1
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_max_i32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s4, 1
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_max_i32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_brev_b32 s4, 1
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_max_i32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s2, 1
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_max_i32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_brev_b32 s2, 1
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_max_i32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s4, 1
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_max_i32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_brev_b32 s4, 1
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_max_i32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s2, 1
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_max_i32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_brev_b32 s2, 1
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_max_i32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_brev_b32 s6, 1
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_max_i32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_brev_b32 s6, 1
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_max_i32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s6, 1
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_max_i32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_brev_b32 s6, 1
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_max_i32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s6, 1
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_max_i32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_brev_b32 s6, 1
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_max_i32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s1, 1
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_max_i32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_brev_b32 s0, 1
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_max_i32 s0, s0, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s6, 1
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_max_i32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_brev_b32 s6, 1
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_max_i32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_max_i32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_brev_b32 s0, 1
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_max_i32 s0, s0, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,184 +919,6 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-LABEL: divergent_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
@@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_brev_b32 s5, 1
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_brev_b32 s5, 1
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s5, 1
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_brev_b32 s5, 1
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, 1
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_brev_b32 s5, 1
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, 1
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_brev_b32 s5, 1
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, 1
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1
@@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8
; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_brev_b32 s1, 1
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0
; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1
@@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8
; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, 1
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_brev_b32 s1, 1
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
@@ -1880,3 +1534,6 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
index 4e0c9ec111cbe..42c8e996fa720 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
@@ -124,187 +124,19 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_brev_b32 s4, -2
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_min_i32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_brev_b32 s4, -2
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_min_i32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s4, -2
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_min_i32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_brev_b32 s4, -2
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_min_i32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s4, -2
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_min_i32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_brev_b32 s4, -2
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_min_i32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s2, -2
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_min_i32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_brev_b32 s2, -2
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_min_i32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s4, -2
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_min_i32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_brev_b32 s4, -2
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_min_i32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s2, -2
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_min_i32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_brev_b32 s2, -2
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_min_i32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_brev_b32 s6, -2
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_min_i32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_brev_b32 s6, -2
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_min_i32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s6, -2
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_min_i32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_brev_b32 s6, -2
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_min_i32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s6, -2
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_min_i32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_brev_b32 s6, -2
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_min_i32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s1, -2
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_min_i32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_brev_b32 s0, -2
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_min_i32 s0, s0, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s6, -2
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_min_i32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_brev_b32 s6, -2
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_min_i32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_min_i32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_brev_b32 s0, -2
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_min_i32 s0, s0, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,184 +919,6 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-LABEL: divergent_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
@@ -1272,7 +926,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_brev_b32 s5, -2
; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1283,7 +937,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1297,7 +951,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_brev_b32 s5, -2
; GFX8GISEL-NEXT: s_mov_b32 s4, -1
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1308,7 +962,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1322,7 +976,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_brev_b32 s5, -2
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1333,7 +987,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1347,7 +1001,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_brev_b32 s5, -2
; GFX9GISEL-NEXT: s_mov_b32 s4, -1
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1358,7 +1012,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1372,7 +1026,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_brev_b32 s5, -2
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1383,7 +1037,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1396,7 +1050,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_brev_b32 s5, -2
; GFX1064GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1407,7 +1061,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1420,7 +1074,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_brev_b32 s5, -2
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1431,7 +1085,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_brev_b32 s5, -2
; GFX1032GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1455,7 +1109,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1468,7 +1122,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_brev_b32 s1, -2
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1
@@ -1480,7 +1134,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8
; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1493,7 +1147,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_brev_b32 s1, -2
; GFX1164GISEL-NEXT: s_mov_b32 s0, -1
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0
; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1
@@ -1505,7 +1159,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8
; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1518,7 +1172,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_brev_b32 s1, -2
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1529,7 +1183,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1541,7 +1195,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_brev_b32 s1, -2
; GFX1132GISEL-NEXT: s_mov_b32 s0, -1
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1552,7 +1206,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1593,19 +1247,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -1642,19 +1296,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1691,19 +1345,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1740,19 +1394,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1793,19 +1447,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1844,19 +1498,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
@@ -1880,3 +1534,6 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 1849eaecbe143..92f9fca285bb6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -124,187 +124,19 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -318,13 +150,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -339,13 +171,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -357,13 +189,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -377,13 +209,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -395,13 +227,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -415,13 +247,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -433,13 +265,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -454,14 +286,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -474,14 +306,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_or_b32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -495,14 +327,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -515,14 +347,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_or_b32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -550,20 +382,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -578,26 +410,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
@@ -621,20 +453,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -648,26 +480,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -690,20 +522,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -717,26 +549,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -759,20 +591,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_or_b32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -786,26 +618,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_or_b32 s0, s0, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -830,21 +662,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -860,27 +692,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_or_b32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -905,21 +737,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_or_b32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -935,27 +767,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_or_b32 s0, s0, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1087,184 +919,6 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1]
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-LABEL: divergent_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
@@ -1272,14 +926,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1293,14 +947,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1314,14 +968,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1335,14 +989,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1356,14 +1010,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1376,14 +1030,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1396,14 +1050,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1416,14 +1070,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1436,7 +1090,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -1444,7 +1098,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1457,7 +1111,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -1465,7 +1119,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1478,7 +1132,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1486,7 +1140,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1498,7 +1152,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1506,7 +1160,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1548,19 +1202,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -1597,19 +1251,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1646,19 +1300,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1695,19 +1349,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1748,19 +1402,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1799,19 +1453,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
@@ -1835,3 +1489,6 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
index 9a0917133fc59..552dd6f859c7a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
@@ -200,1834 +200,995 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
+; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, 0x7b
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s4, s2
+; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX8DAGISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: const_value:
+; GFX8GISEL-LABEL: divergent_value:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX8GISEL-NEXT: s_mov_b32 s4, 0
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX8GISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: const_value:
+; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX9DAGISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: ; %bb.2:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: const_value:
+; GFX9GISEL-LABEL: divergent_value:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX9GISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: ; %bb.2:
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value:
+; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1064DAGISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: ; %bb.2:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: const_value:
+; GFX1064GISEL-LABEL: divergent_value:
; GFX1064GISEL: ; %bb.0: ; %entry
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1064GISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: ; %bb.2:
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: const_value:
+; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1032DAGISEL-NEXT: s_sub_i32 s2, s2, s5
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: ; %bb.2:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: const_value:
+; GFX1032GISEL-LABEL: divergent_value:
; GFX1032GISEL: ; %bb.0: ; %entry
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
+; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1032GISEL-NEXT: s_sub_i32 s2, s2, s5
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: const_value:
+; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1164DAGISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: ; %bb.2:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: const_value:
+; GFX1164GISEL-LABEL: divergent_value:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1164GISEL-NEXT: s_sub_i32 s4, s4, s6
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: ; %bb.2:
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: const_value:
+; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1132DAGISEL-NEXT: s_sub_i32 s2, s2, s5
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: ; %bb.2:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: const_value:
+; GFX1132GISEL-LABEL: divergent_value:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: s_sub_i32 s3, 0, 0x7b
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1132GISEL-NEXT: s_sub_i32 s2, s2, s5
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: ; %bb.2:
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1)
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1)
store i32 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
+define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
+; GFX8DAGISEL-LABEL: divergent_cfg:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s0
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s4, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
+; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s6
+; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX8DAGISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8DAGISEL-NEXT: ; %bb.5:
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: poison_value:
+; GFX8GISEL-LABEL: divergent_cfg:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8GISEL-NEXT: ; %bb.1: ; %else
+; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s6
+; GFX8GISEL-NEXT: s_mul_i32 s6, s3, s2
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX8GISEL-NEXT: ; %bb.3: ; %if
+; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT: s_mov_b32 s6, 0
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX8GISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
+; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: poison_value:
+; GFX9DAGISEL-LABEL: divergent_cfg:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s0
+; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s6
; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
+; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX9DAGISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9DAGISEL-NEXT: ; %bb.5:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: poison_value:
+; GFX9GISEL-LABEL: divergent_cfg:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9GISEL-NEXT: ; %bb.1: ; %else
+; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s6
+; GFX9GISEL-NEXT: s_mul_i32 s6, s3, s2
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX9GISEL-NEXT: ; %bb.3: ; %if
+; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT: s_mov_b32 s6, 0
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX9GISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
+; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value:
+; GFX1064DAGISEL-LABEL: divergent_cfg:
; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s0
+; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s6
; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1064DAGISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064DAGISEL-NEXT: ; %bb.5:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: poison_value:
+; GFX1064GISEL-LABEL: divergent_cfg:
; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1064GISEL-NEXT: ; %bb.1: ; %else
+; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s6
+; GFX1064GISEL-NEXT: s_mul_i32 s6, s3, s2
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1064GISEL-NEXT: ; %bb.3: ; %if
+; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1064GISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: poison_value:
+; GFX1032DAGISEL-LABEL: divergent_cfg:
; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
+; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT: s_sub_i32 s1, 0, s1
+; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
+; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1032DAGISEL-NEXT: s_sub_i32 s1, s1, s6
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032DAGISEL-NEXT: ; %bb.5:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: poison_value:
+; GFX1032GISEL-LABEL: divergent_cfg:
; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1032GISEL-NEXT: ; %bb.1: ; %else
+; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX1032GISEL-NEXT: s_sub_i32 s0, 0, s0
+; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1032GISEL-NEXT: ; %bb.3: ; %if
+; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
+; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1032GISEL-NEXT: s_sub_i32 s0, s0, s6
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: poison_value:
+; GFX1164DAGISEL-LABEL: divergent_cfg:
; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s6
; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1164DAGISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164DAGISEL-NEXT: ; %bb.5:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value:
+; GFX1164GISEL-LABEL: divergent_cfg:
; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1164GISEL-NEXT: ; %bb.1: ; %else
+; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s6
+; GFX1164GISEL-NEXT: s_mul_i32 s6, s3, s2
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1164GISEL-NEXT: ; %bb.3: ; %if
+; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1164GISEL-NEXT: s_sub_i32 s6, s6, s8
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value:
+; GFX1132DAGISEL-LABEL: divergent_cfg:
; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, s2
+; GFX1132DAGISEL-NEXT: s_sub_i32 s1, 0, s1
+; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
+; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_sub_i32 s3, 0, s0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: divergent_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX8DAGISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX8DAGISEL-NEXT: ; %bb.2:
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX8GISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX8GISEL-NEXT: ; %bb.2:
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX9DAGISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX9DAGISEL-NEXT: ; %bb.2:
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX9GISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX9GISEL-NEXT: ; %bb.2:
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1064DAGISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1064DAGISEL-NEXT: ; %bb.2:
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1064GISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1064GISEL-NEXT: ; %bb.2:
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
-; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1032DAGISEL-NEXT: s_sub_i32 s2, s2, s5
-; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1032DAGISEL-NEXT: ; %bb.2:
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
-; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1032GISEL-NEXT: s_sub_i32 s2, s2, s5
-; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1032GISEL-NEXT: ; %bb.2:
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1164DAGISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1164DAGISEL-NEXT: ; %bb.2:
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1164GISEL-NEXT: s_sub_i32 s4, s4, s6
-; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1164GISEL-NEXT: ; %bb.2:
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1132DAGISEL-NEXT: s_sub_i32 s2, s2, s5
-; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
-; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1132GISEL-NEXT: s_sub_i32 s2, s2, s5
-; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1132GISEL-NEXT: ; %bb.2:
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %id.x, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: divergent_cfg:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX8DAGISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8DAGISEL-NEXT: ; %bb.5:
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_cfg:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX8GISEL-NEXT: ; %bb.1: ; %else
-; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX8GISEL-NEXT: s_mul_i32 s6, s3, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX8GISEL-NEXT: ; %bb.3: ; %if
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX8GISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
-; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_cfg:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX9DAGISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9DAGISEL-NEXT: ; %bb.5:
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_cfg:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX9GISEL-NEXT: ; %bb.1: ; %else
-; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX9GISEL-NEXT: s_mul_i32 s6, s3, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX9GISEL-NEXT: ; %bb.3: ; %if
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX9GISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
-; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_cfg:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1064DAGISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064DAGISEL-NEXT: ; %bb.5:
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_cfg:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1064GISEL-NEXT: ; %bb.1: ; %else
-; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX1064GISEL-NEXT: s_mul_i32 s6, s3, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1064GISEL-NEXT: ; %bb.3: ; %if
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1064GISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_cfg:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
-; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
-; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_sub_i32 s1, 0, s1
-; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
-; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1032DAGISEL-NEXT: s_sub_i32 s1, s1, s6
-; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032DAGISEL-NEXT: ; %bb.5:
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_cfg:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0
-; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
-; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1032GISEL-NEXT: ; %bb.1: ; %else
-; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_sub_i32 s0, 0, s0
-; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1032GISEL-NEXT: ; %bb.3: ; %if
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
-; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1032GISEL-NEXT: s_sub_i32 s0, s0, s6
-; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_cfg:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
-; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, s2
-; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1164DAGISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164DAGISEL-NEXT: ; %bb.5:
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_cfg:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
-; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1164GISEL-NEXT: ; %bb.1: ; %else
-; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_sub_i32 s3, 0, s6
-; GFX1164GISEL-NEXT: s_mul_i32 s6, s3, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1164GISEL-NEXT: ; %bb.3: ; %if
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1164GISEL-NEXT: s_sub_i32 s6, s6, s8
-; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_cfg:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
-; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_sub_i32 s1, 0, s1
-; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1132DAGISEL-NEXT: s_sub_i32 s1, s1, s6
-; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132DAGISEL-NEXT: ; %bb.5:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_cfg:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
-; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
-; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1132GISEL-NEXT: ; %bb.1: ; %else
-; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_sub_i32 s0, 0, s0
-; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1132GISEL-NEXT: ; %bb.3: ; %if
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1132GISEL-NEXT: s_sub_i32 s0, s0, s6
-; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %tid = call i32 @llvm.amdgcn.workitem.id.x()
- %d_cmp = icmp ult i32 %tid, 16
- br i1 %d_cmp, label %if, label %else
-
-if:
- %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1)
- br label %endif
-
-else:
- %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1)
- br label %endif
-
-endif:
- %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
- store i32 %combine, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: uniform_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: s_ashr_i32 s0, s4, 31
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: s_mul_i32 s1, s2, s0
-; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s4
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3
-; GFX8DAGISEL-NEXT: s_add_u32 s1, s2, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: uniform_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8GISEL-NEXT: s_sub_i32 s5, 0, s4
-; GFX8GISEL-NEXT: s_ashr_i32 s4, s5, 31
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s4
-; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5
-; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5
-; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3
-; GFX8GISEL-NEXT: s_add_u32 s5, s2, s6
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: uniform_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9DAGISEL-NEXT: s_sub_i32 s5, 0, s4
-; GFX9DAGISEL-NEXT: s_ashr_i32 s4, s5, 31
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s6, s2, s4
-; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5
-; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3
-; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s6
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: uniform_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9GISEL-NEXT: s_sub_i32 s5, 0, s4
-; GFX9GISEL-NEXT: s_ashr_i32 s4, s5, 31
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s4
-; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5
-; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5
-; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3
-; GFX9GISEL-NEXT: s_add_u32 s5, s2, s6
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: uniform_value_i64:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064DAGISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1064DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: uniform_value_i64:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064GISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1064GISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1064GISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: uniform_value_i64:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1032DAGISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1032DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: uniform_value_i64:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1032GISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1032GISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1032GISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: uniform_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1164DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: uniform_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1164GISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1164GISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: uniform_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1132DAGISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: uniform_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_sub_i32 s4, 0, s4
-; GFX1132GISEL-NEXT: s_ashr_i32 s5, s4, 31
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
-; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, s5
-; GFX1132GISEL-NEXT: s_add_u32 s3, s6, s3
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s5
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX8DAGISEL-NEXT: s_ashr_i32 s2, s3, 31
-; GFX8DAGISEL-NEXT: s_mul_i32 s4, s2, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX8GISEL-NEXT: s_ashr_i32 s2, s3, 31
-; GFX8GISEL-NEXT: s_mul_i32 s4, s2, 0x7b
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX8GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_ashr_i32 s2, s3, 31
-; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_ashr_i32 s2, s3, 31
-; GFX9GISEL-NEXT: s_mul_i32 s4, s2, 0x7b
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT: s_mul_hi_u32 s5, 0x7b, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX9GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1064DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1064GISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1064GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1064GISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1032DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1032GISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1032GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1032GISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1164DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1164DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1164GISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1164GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1164GISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1164GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1132DAGISEL-NEXT: s_sub_i32 s1, s1, s6
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132DAGISEL-NEXT: ; %bb.5:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1132DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: const_value_i64:
+; GFX1132GISEL-LABEL: divergent_cfg:
; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1132GISEL-NEXT: ; %bb.1: ; %else
+; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
+; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1132GISEL-NEXT: s_ashr_i32 s3, s2, 31
-; GFX1132GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, 0
-; GFX1132GISEL-NEXT: s_mulk_i32 s3, 0x7b
-; GFX1132GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT: s_sub_i32 s0, 0, s0
+; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1132GISEL-NEXT: ; %bb.3: ; %if
+; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1132GISEL-NEXT: s_sub_i32 s0, s0, s6
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %d_cmp = icmp ult i32 %tid, 16
+ br i1 %d_cmp, label %if, label %else
+
+if:
+ %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %tid, i32 1)
+ br label %endif
+
+else:
+ %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %in, i32 1)
+ br label %endif
+
+endif:
+ %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
+ store i32 %combine, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
+define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX8DAGISEL-NEXT: s_ashr_i32 s2, s3, 31
+; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8DAGISEL-NEXT: s_sub_i32 s4, 0, s4
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s4, s0, s2
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s5, s0, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s3, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: s_ashr_i32 s0, s4, 31
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX8DAGISEL-NEXT: s_mul_i32 s1, s2, s0
+; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4
+; GFX8DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s4
+; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3
+; GFX8DAGISEL-NEXT: s_add_u32 s1, s2, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: poison_value_i64:
+; GFX8GISEL-LABEL: uniform_value_i64:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX8GISEL-NEXT: s_ashr_i32 s2, s3, 31
+; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8GISEL-NEXT: s_sub_i32 s5, 0, s4
+; GFX8GISEL-NEXT: s_ashr_i32 s4, s5, 31
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s4, s0, s2
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT: s_mul_hi_u32 s5, s0, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX8GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX8GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s4
+; GFX8GISEL-NEXT: s_mul_i32 s4, s2, s5
+; GFX8GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
+; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s5
+; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3
+; GFX8GISEL-NEXT: s_add_u32 s5, s2, s6
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: poison_value_i64:
+; GFX9DAGISEL-LABEL: uniform_value_i64:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX9DAGISEL-NEXT: s_ashr_i32 s2, s3, 31
+; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9DAGISEL-NEXT: s_sub_i32 s5, 0, s4
+; GFX9DAGISEL-NEXT: s_ashr_i32 s4, s5, 31
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s4, s0, s2
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s5, s0, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9DAGISEL-NEXT: s_mul_i32 s6, s2, s4
+; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5
+; GFX9DAGISEL-NEXT: s_mul_hi_u32 s2, s2, s5
+; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5
+; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3
+; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s6
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: poison_value_i64:
+; GFX9GISEL-LABEL: uniform_value_i64:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_sub_i32 s3, 0, s2
-; GFX9GISEL-NEXT: s_ashr_i32 s2, s3, 31
+; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9GISEL-NEXT: s_sub_i32 s5, 0, s4
+; GFX9GISEL-NEXT: s_ashr_i32 s4, s5, 31
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s4, s0, s2
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT: s_mul_hi_u32 s5, s0, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX9GISEL-NEXT: s_add_u32 s3, s5, s3
-; GFX9GISEL-NEXT: s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s4
+; GFX9GISEL-NEXT: s_mul_i32 s4, s2, s5
+; GFX9GISEL-NEXT: s_mul_hi_u32 s2, s2, s5
+; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s5
+; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3
+; GFX9GISEL-NEXT: s_add_u32 s5, s2, s6
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
+; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
+; GFX1064DAGISEL-LABEL: uniform_value_i64:
; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1064DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064DAGISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1064DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1064DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1064DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1064DAGISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1064DAGISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: poison_value_i64:
+; GFX1064GISEL-LABEL: uniform_value_i64:
; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1064GISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064GISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1064GISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1064GISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1064GISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1064GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1064GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1064GISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1064GISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1064GISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
+; GFX1032DAGISEL-LABEL: uniform_value_i64:
; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1032DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
+; GFX1032DAGISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1032DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1032DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1032DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1032DAGISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1032DAGISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: poison_value_i64:
+; GFX1032GISEL-LABEL: uniform_value_i64:
; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1032GISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
+; GFX1032GISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1032GISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1032GISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1032GISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1032GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1032GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1032GISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1032GISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1032GISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
+; GFX1164DAGISEL-LABEL: uniform_value_i64:
; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1164DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1164DAGISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1164DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1164DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1164DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1164DAGISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1164DAGISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1164DAGISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value_i64:
+; GFX1164GISEL-LABEL: uniform_value_i64:
; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1164GISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1164GISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1164GISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1164GISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1164GISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1164GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1164GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1164GISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1164GISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1164GISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
+; GFX1132DAGISEL-LABEL: uniform_value_i64:
; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_sub_i32 s2, 0, s2
+; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
+; GFX1132DAGISEL-NEXT: s_sub_i32 s4, 0, s4
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1132DAGISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1132DAGISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1132DAGISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1132DAGISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1132DAGISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1132DAGISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: poison_value_i64:
+; GFX1132GISEL-LABEL: uniform_value_i64:
; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_sub_i32 s2, 0, s2
-; GFX1132GISEL-NEXT: s_ashr_i32 s3, s2, 31
+; GFX1132GISEL-NEXT: s_sub_i32 s4, 0, s4
+; GFX1132GISEL-NEXT: s_ashr_i32 s5, s4, 31
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_hi_u32 s4, s0, s2
-; GFX1132GISEL-NEXT: s_mul_i32 s5, s1, s2
-; GFX1132GISEL-NEXT: s_mul_i32 s3, s0, s3
-; GFX1132GISEL-NEXT: s_add_u32 s4, s4, s5
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX1132GISEL-NEXT: s_mul_hi_u32 s6, s2, s4
+; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4
+; GFX1132GISEL-NEXT: s_mul_i32 s5, s2, s5
+; GFX1132GISEL-NEXT: s_add_u32 s3, s6, s3
+; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1132GISEL-NEXT: s_add_u32 s3, s3, s5
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1)
+ %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %in, i32 1)
store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -2039,7 +1200,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX8DAGISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -2047,7 +1208,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX8DAGISEL-NEXT: s_subb_u32 s5, s5, s10
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2061,7 +1222,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX8GISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -2069,7 +1230,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX8GISEL-NEXT: s_subb_u32 s5, s5, s10
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2083,7 +1244,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX9DAGISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -2091,7 +1252,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX9DAGISEL-NEXT: s_subb_u32 s5, s5, s10
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2105,7 +1266,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX9GISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -2113,7 +1274,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s8
; GFX9GISEL-NEXT: s_subb_u32 s5, s5, s10
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2127,7 +1288,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX1064DAGISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -2135,7 +1296,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_sub_u32 s4, s4, s9
; GFX1064DAGISEL-NEXT: s_subb_u32 s5, s5, s10
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2148,7 +1309,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s8, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v2, s8
; GFX1064GISEL-NEXT: v_readlane_b32 s10, v3, s8
@@ -2156,7 +1317,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_sub_u32 s4, s4, s9
; GFX1064GISEL-NEXT: s_subb_u32 s5, s5, s10
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2169,7 +1330,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
@@ -2177,7 +1338,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_sub_u32 s4, s4, s8
; GFX1032DAGISEL-NEXT: s_subb_u32 s5, s5, s9
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2190,7 +1351,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
@@ -2198,7 +1359,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_sub_u32 s4, s4, s8
; GFX1032GISEL-NEXT: s_subb_u32 s5, s5, s9
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2211,7 +1372,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s5, v2, s4
@@ -2220,7 +1381,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_sub_u32 s0, s0, s5
; GFX1164DAGISEL-NEXT: s_subb_u32 s1, s1, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2233,7 +1394,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s4, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s5, v2, s4
@@ -2242,7 +1403,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_sub_u32 s0, s0, s5
; GFX1164GISEL-NEXT: s_subb_u32 s1, s1, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2255,7 +1416,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2264,7 +1425,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_sub_u32 s0, s0, s4
; GFX1132DAGISEL-NEXT: s_subb_u32 s1, s1, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2276,7 +1437,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2285,7 +1446,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_sub_u32 s0, s0, s4
; GFX1132GISEL-NEXT: s_subb_u32 s1, s1, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2305,7 +1466,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2318,13 +1479,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, s7
; GFX8DAGISEL-NEXT: s_add_u32 s2, s2, s3
; GFX8DAGISEL-NEXT: s_add_u32 s7, s2, s10
-; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s7
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2338,7 +1499,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: s_add_u32 s7, s4, s8
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s7
-; GFX8DAGISEL-NEXT: .LBB9_4: ; %endif
+; GFX8DAGISEL-NEXT: .LBB5_4: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -2352,7 +1513,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2365,10 +1526,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s7
; GFX8GISEL-NEXT: s_add_u32 s2, s2, s3
; GFX8GISEL-NEXT: s_add_u32 s7, s2, s10
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -2382,7 +1543,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_mul_i32 s5, s5, s7
; GFX8GISEL-NEXT: s_add_u32 s4, s4, s5
; GFX8GISEL-NEXT: s_add_u32 s7, s4, s8
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -2399,7 +1560,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
@@ -2412,13 +1573,13 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s5
; GFX9DAGISEL-NEXT: s_add_u32 s2, s2, s3
; GFX9DAGISEL-NEXT: s_add_u32 s5, s2, s10
-; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[2:3]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
@@ -2432,7 +1593,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: s_add_u32 s5, s5, s8
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s5
-; GFX9DAGISEL-NEXT: .LBB9_4: ; %endif
+; GFX9DAGISEL-NEXT: .LBB5_4: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -2445,7 +1606,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2458,10 +1619,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s7
; GFX9GISEL-NEXT: s_add_u32 s2, s2, s3
; GFX9GISEL-NEXT: s_add_u32 s7, s2, s10
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -2475,7 +1636,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_mul_i32 s5, s8, s5
; GFX9GISEL-NEXT: s_add_u32 s4, s7, s4
; GFX9GISEL-NEXT: s_add_u32 s7, s4, s5
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2492,7 +1653,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_mov_b64 s[8:9], exec
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s8, s[8:9]
@@ -2505,7 +1666,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: s_add_u32 s3, s10, s3
; GFX1064DAGISEL-NEXT: s_mul_i32 s8, s2, s8
; GFX1064DAGISEL-NEXT: s_add_u32 s9, s3, s9
-; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[4:5]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s8
@@ -2537,7 +1698,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2550,10 +1711,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_add_u32 s3, s10, s3
; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1064GISEL-NEXT: s_add_u32 s7, s3, s7
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -2567,7 +1728,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_add_u32 s7, s8, s7
; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4
; GFX1064GISEL-NEXT: s_add_u32 s7, s7, s5
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2584,7 +1745,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
@@ -2597,7 +1758,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: s_add_u32 s3, s9, s3
; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s4
; GFX1032DAGISEL-NEXT: s_add_u32 s5, s3, s5
-; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -2629,7 +1790,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6
@@ -2642,10 +1803,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_add_u32 s3, s9, s3
; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1032GISEL-NEXT: s_add_u32 s7, s3, s7
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -2659,7 +1820,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_add_u32 s5, s5, s7
; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3
; GFX1032GISEL-NEXT: s_add_u32 s7, s5, s4
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2678,7 +1839,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_mov_b64 s[8:9], exec
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2693,7 +1854,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_add_u32 s3, s10, s3
; GFX1164DAGISEL-NEXT: s_mul_i32 s8, s2, s8
; GFX1164DAGISEL-NEXT: s_add_u32 s9, s3, s9
-; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[6:7]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s8
@@ -2729,7 +1890,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2744,10 +1905,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_add_u32 s3, s10, s3
; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1164GISEL-NEXT: s_add_u32 s7, s3, s7
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -2763,7 +1924,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_add_u32 s5, s8, s5
; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s6
; GFX1164GISEL-NEXT: s_add_u32 s7, s5, s7
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2782,7 +1943,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2797,7 +1958,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_add_u32 s3, s9, s3
; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1132DAGISEL-NEXT: s_add_u32 s7, s3, s7
-; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -2832,7 +1993,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2847,10 +2008,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_add_u32 s3, s9, s3
; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s6
; GFX1132GISEL-NEXT: s_add_u32 s7, s3, s7
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -2866,7 +2027,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_add_u32 s5, s7, s5
; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3
; GFX1132GISEL-NEXT: s_add_u32 s7, s5, s8
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index a4a5b01a873b7..dc2c2dad16f55 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -125,153 +125,19 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_max_u32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_max_u32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_max_u32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_max_u32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_max_u32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_max_u32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_max_u32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_max_u32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_max_u32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_max_u32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_max_u32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_max_u32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_max_u32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_max_u32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
@@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_max_u32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_max_u32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_max_u32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_max_u32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_max_u32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_max_u32 s0, s0, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_max_u32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_max_u32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_max_u32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_max_u32 s0, s0, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1054,146 +920,6 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-LABEL: divergent_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
@@ -1330,19 +1056,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -1379,19 +1105,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1428,19 +1154,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1477,19 +1203,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1530,19 +1256,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1581,19 +1307,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
@@ -1617,3 +1343,6 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 29a78855d6629..d170fbc957e28 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -125,153 +125,19 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: poison_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8DAGISEL-NEXT: s_min_u32 s4, s4, s6
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -285,13 +151,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s4, -1
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX8GISEL-NEXT: s_min_u32 s4, s4, s6
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -306,13 +172,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9DAGISEL-NEXT: s_min_u32 s4, s4, s6
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -324,13 +190,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s4, -1
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX9GISEL-NEXT: s_min_u32 s4, s4, s6
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -344,13 +210,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064DAGISEL-NEXT: s_min_u32 s4, s4, s6
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -362,13 +228,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1064GISEL-NEXT: s_min_u32 s4, s4, s6
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -382,13 +248,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032DAGISEL-NEXT: s_min_u32 s2, s2, s5
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -400,13 +266,13 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s2, -1
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1032GISEL-NEXT: s_min_u32 s2, s2, s5
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -421,14 +287,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164DAGISEL-NEXT: s_min_u32 s4, s4, s6
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -441,14 +307,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s4, -1
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
; GFX1164GISEL-NEXT: s_min_u32 s4, s4, s6
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
@@ -462,14 +328,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132DAGISEL-NEXT: s_min_u32 s2, s2, s5
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -482,14 +348,14 @@ define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s2, -1
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
; GFX1132GISEL-NEXT: s_min_u32 s2, s2, s5
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -517,20 +383,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8DAGISEL-NEXT: s_min_u32 s6, s6, s8
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX8DAGISEL-NEXT: ; %bb.5:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -545,26 +411,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b32 s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_mov_b32 s6, -1
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX8GISEL-NEXT: s_min_u32 s6, s6, s8
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
@@ -588,20 +454,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9DAGISEL-NEXT: s_min_u32 s6, s6, s8
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX9DAGISEL-NEXT: ; %bb.5:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -615,26 +481,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b32 s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_mov_b32 s6, -1
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX9GISEL-NEXT: s_min_u32 s6, s6, s8
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -657,20 +523,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064DAGISEL-NEXT: s_min_u32 s6, s6, s8
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1064DAGISEL-NEXT: ; %bb.5:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -684,26 +550,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c
; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1064GISEL-NEXT: s_mov_b32 s6, -1
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1064GISEL-NEXT: s_min_u32 s6, s6, s8
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -726,20 +592,20 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032DAGISEL-NEXT: s_min_u32 s1, s1, s6
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1032DAGISEL-NEXT: ; %bb.5:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -753,26 +619,26 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s0, -1
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1032GISEL-NEXT: s_min_u32 s0, s0, s6
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
@@ -797,21 +663,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164DAGISEL-NEXT: s_min_u32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1164DAGISEL-NEXT: ; %bb.5:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -827,27 +693,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c
; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b32 s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s6, -1
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
; GFX1164GISEL-NEXT: s_min_u32 s6, s6, s8
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -872,21 +738,21 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_min_u32 s1, s1, s6
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
; GFX1132DAGISEL-NEXT: ; %bb.5:
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
@@ -902,27 +768,27 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b32 s0, s0
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s0, -1
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_min_u32 s0, s0, s6
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
@@ -1054,146 +920,6 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: const_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: const_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX10GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: const_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: const_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v1, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: const_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: poison_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: poison_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: poison_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX10DAGISEL-LABEL: poison_value_i64:
-; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_endpgm
-;
-; GFX10GISEL-LABEL: poison_value_i64:
-; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_endpgm
-;
-; GFX11DAGISEL-LABEL: poison_value_i64:
-; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_endpgm
-;
-; GFX11GISEL-LABEL: poison_value_i64:
-; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-LABEL: divergent_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
@@ -1201,7 +927,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1212,7 +938,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX8DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1226,7 +952,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b32 s4, -1
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX8GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1237,7 +963,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX8GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1251,7 +977,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1262,7 +988,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX9DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1276,7 +1002,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b32 s4, -1
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX9GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1287,7 +1013,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX9GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1301,7 +1027,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1312,7 +1038,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX1064DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1325,7 +1051,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b32 s4, -1
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s12, s[6:7]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1336,7 +1062,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s12
; GFX1064GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1349,7 +1075,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, -1
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1360,7 +1086,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1373,7 +1099,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s4, -1
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v4, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v5, s5
@@ -1384,7 +1110,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1397,7 +1123,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, -1
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v4, s0
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v5, s1
@@ -1409,7 +1135,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s8
; GFX1164DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1422,7 +1148,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b32 s0, -1
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s8, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v4, s0
; GFX1164GISEL-NEXT: v_mov_b32_e32 v5, s1
@@ -1434,7 +1160,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s8
; GFX1164GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -1447,7 +1173,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, -1
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1458,7 +1184,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1470,7 +1196,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s0, -1
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -1481,7 +1207,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -1522,19 +1248,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -1571,19 +1297,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1620,19 +1346,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1669,19 +1395,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mov_b64 s[6:7], s[6:7]
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1722,19 +1448,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -1773,19 +1499,19 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
@@ -1809,3 +1535,6 @@ endif:
store i64 %combine, ptr addrspace(1) %out
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11DAGISEL: {{.*}}
+; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index 9c523b2404121..c64ad81fdcf6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -200,1494 +200,773 @@ entry:
ret void
}
-define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value:
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
+; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
+; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX8DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: const_value:
+; GFX8GISEL-LABEL: divergent_value:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b
+; GFX8GISEL-NEXT: s_mov_b32 s4, 0
+; GFX8GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: const_value:
+; GFX9DAGISEL-LABEL: divergent_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX9DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9DAGISEL-NEXT: ; %bb.2:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: const_value:
+; GFX9GISEL-LABEL: divergent_value:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX9GISEL-NEXT: ; %bb.2:
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value:
+; GFX1064DAGISEL-LABEL: divergent_value:
; GFX1064DAGISEL: ; %bb.0: ; %entry
; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1064DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064DAGISEL-NEXT: ; %bb.2:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: const_value:
+; GFX1064GISEL-LABEL: divergent_value:
; GFX1064GISEL: ; %bb.0: ; %entry
; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1064GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
+; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1064GISEL-NEXT: ; %bb.2:
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: const_value:
+; GFX1032DAGISEL-LABEL: divergent_value:
; GFX1032DAGISEL: ; %bb.0: ; %entry
; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1032DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032DAGISEL-NEXT: ; %bb.2:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: const_value:
+; GFX1032GISEL-LABEL: divergent_value:
; GFX1032GISEL: ; %bb.0: ; %entry
; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
+; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1032GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
+; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: const_value:
+; GFX1164DAGISEL-LABEL: divergent_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
+; GFX1164DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164DAGISEL-NEXT: ; %bb.2:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: const_value:
+; GFX1164GISEL-LABEL: divergent_value:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
+; GFX1164GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
+; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1164GISEL-NEXT: ; %bb.2:
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: const_value:
+; GFX1132DAGISEL-LABEL: divergent_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
+; GFX1132DAGISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132DAGISEL-NEXT: ; %bb.2:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: const_value:
+; GFX1132GISEL-LABEL: divergent_value:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
+; GFX1132GISEL-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
+; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
+; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB1_1
+; GFX1132GISEL-NEXT: ; %bb.2:
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1)
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1)
store i32 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: poison_value:
+define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
+; GFX8DAGISEL-LABEL: divergent_cfg:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
+; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX8DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX8DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8DAGISEL-NEXT: ; %bb.5:
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX8DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: poison_value:
+; GFX8GISEL-LABEL: divergent_cfg:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX8GISEL-NEXT: ; %bb.1: ; %else
+; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX8GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX8GISEL-NEXT: ; %bb.3: ; %if
+; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX8GISEL-NEXT: s_mov_b32 s6, 0
+; GFX8GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX8GISEL-NEXT: .LBB2_5: ; %endif
+; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: poison_value:
+; GFX9DAGISEL-LABEL: divergent_cfg:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2
+; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX9DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX9DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9DAGISEL-NEXT: ; %bb.5:
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX9DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: poison_value:
+; GFX9GISEL-LABEL: divergent_cfg:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX9GISEL-NEXT: ; %bb.1: ; %else
+; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
+; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX9GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX9GISEL-NEXT: ; %bb.3: ; %if
+; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX9GISEL-NEXT: s_mov_b32 s6, 0
+; GFX9GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX9GISEL-NEXT: .LBB2_5: ; %endif
+; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value:
+; GFX1064DAGISEL-LABEL: divergent_cfg:
; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
+; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
+; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX1064DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064DAGISEL-NEXT: ; %bb.5:
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX1064DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: poison_value:
+; GFX1064GISEL-LABEL: divergent_cfg:
; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
+; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1064GISEL-NEXT: ; %bb.1: ; %else
+; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX1064GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1064GISEL-NEXT: ; %bb.3: ; %if
+; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1064GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
+; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1064GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: poison_value:
+; GFX1032DAGISEL-LABEL: divergent_cfg:
; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
+; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
+; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
+; GFX1032DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
+; GFX1032DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
+; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6
+; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032DAGISEL-NEXT: ; %bb.5:
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1032DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: poison_value:
+; GFX1032GISEL-LABEL: divergent_cfg:
; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0
+; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1032GISEL-NEXT: ; %bb.1: ; %else
+; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2
+; GFX1032GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1032GISEL-NEXT: ; %bb.3: ; %if
+; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1032GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
+; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1032GISEL-NEXT: s_xor_b32 s0, s0, s6
+; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1032GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: poison_value:
+; GFX1164DAGISEL-LABEL: divergent_cfg:
; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
+; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
+; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2
+; GFX1164DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: divergent_value:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX8DAGISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX8DAGISEL-NEXT: ; %bb.2:
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_value:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_mov_b32 s4, 0
-; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX8GISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX8GISEL-NEXT: ; %bb.2:
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_value:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX9DAGISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX9DAGISEL-NEXT: ; %bb.2:
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_value:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_mov_b32 s4, 0
-; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX9GISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX9GISEL-NEXT: ; %bb.2:
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1064DAGISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1064DAGISEL-NEXT: ; %bb.2:
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3]
-; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1064GISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1064GISEL-NEXT: ; %bb.2:
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3
-; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1032DAGISEL-NEXT: s_xor_b32 s2, s2, s5
-; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1032DAGISEL-NEXT: ; %bb.2:
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1032GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3
-; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1032GISEL-NEXT: s_xor_b32 s2, s2, s5
-; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1032GISEL-NEXT: ; %bb.2:
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1164DAGISEL-NEXT: s_xor_b32 s4, s4, s6
+; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
+; GFX1164DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
+; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1164DAGISEL-NEXT: ; %bb.2:
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: s_mov_b32 s4, 0
-; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5
-; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5
-; GFX1164GISEL-NEXT: s_xor_b32 s4, s4, s6
-; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1164GISEL-NEXT: ; %bb.2:
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1132DAGISEL-NEXT: s_xor_b32 s2, s2, s5
-; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1132DAGISEL-NEXT: ; %bb.2:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
-; GFX1132GISEL-NEXT: s_mov_b32 s2, 0
-; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3
-; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4
-; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4
-; GFX1132GISEL-NEXT: s_xor_b32 s2, s2, s5
-; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1
-; GFX1132GISEL-NEXT: ; %bb.2:
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %id.x, i32 1)
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
-; GFX8DAGISEL-LABEL: divergent_cfg:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX8DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX8DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX8DAGISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8DAGISEL-NEXT: ; %bb.5:
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: divergent_cfg:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX8GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX8GISEL-NEXT: ; %bb.1: ; %else
-; GFX8GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX8GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX8GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX8GISEL-NEXT: ; %bb.3: ; %if
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_mov_b32 s6, 0
-; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX8GISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX8GISEL-NEXT: .LBB4_5: ; %endif
-; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: divergent_cfg:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX9DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX9DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX9DAGISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9DAGISEL-NEXT: ; %bb.5:
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: divergent_cfg:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX9GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX9GISEL-NEXT: ; %bb.1: ; %else
-; GFX9GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX9GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX9GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX9GISEL-NEXT: ; %bb.3: ; %if
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_mov_b32 s6, 0
-; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX9GISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX9GISEL-NEXT: .LBB4_5: ; %endif
-; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: divergent_cfg:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0
-; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1064DAGISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1064DAGISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064DAGISEL-NEXT: ; %bb.5:
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: divergent_cfg:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0
-; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1064GISEL-NEXT: ; %bb.1: ; %else
-; GFX1064GISEL-NEXT: s_load_dword s6, s[4:5], 0x2c
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1064GISEL-NEXT: ; %bb.3: ; %if
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3]
-; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1064GISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1064GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: divergent_cfg:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0
-; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
-; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2
-; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1032DAGISEL-NEXT: s_xor_b32 s1, s1, s6
-; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032DAGISEL-NEXT: ; %bb.5:
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: divergent_cfg:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0
-; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo
-; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1032GISEL-NEXT: ; %bb.1: ; %else
-; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s0, s0, s2
-; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1032GISEL-NEXT: ; %bb.3: ; %if
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2
-; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1032GISEL-NEXT: s_xor_b32 s0, s0, s6
-; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1032GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: divergent_cfg:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
-; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1164DAGISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s6, s2
-; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1164DAGISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164DAGISEL-NEXT: ; %bb.5:
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
-; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: divergent_cfg:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
-; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6
-; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
-; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1164GISEL-NEXT: ; %bb.1: ; %else
-; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2
-; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1164GISEL-NEXT: ; %bb.3: ; %if
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
-; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
-; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
-; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8
-; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1164GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: divergent_cfg:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo
-; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
-; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
-; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
-; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6
-; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
-; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6
-; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132DAGISEL-NEXT: ; %bb.5:
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif
-; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: divergent_cfg:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
-; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
-; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2
-; GFX1132GISEL-NEXT: ; %bb.1: ; %else
-; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2
-; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow
-; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5
-; GFX1132GISEL-NEXT: ; %bb.3: ; %if
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
-; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1
-; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
-; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
-; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s6
-; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4
-; GFX1132GISEL-NEXT: .LBB4_5: ; %endif
-; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
-; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %tid = call i32 @llvm.amdgcn.workitem.id.x()
- %d_cmp = icmp ult i32 %tid, 16
- br i1 %d_cmp, label %if, label %else
-
-if:
- %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1)
- br label %endif
-
-else:
- %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1)
- br label %endif
-
-endif:
- %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
- store i32 %combine, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: uniform_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: s_mul_i32 s1, s3, s4
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: uniform_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX8GISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: uniform_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: uniform_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX9GISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: uniform_value_i64:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: uniform_value_i64:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1064GISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: uniform_value_i64:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: uniform_value_i64:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1032GISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: uniform_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
-;
-; GFX1164GISEL-LABEL: uniform_value_i64:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
-;
-; GFX1132DAGISEL-LABEL: uniform_value_i64:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1132DAGISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
-;
-; GFX1132GISEL-LABEL: uniform_value_i64:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_and_b32 s4, s4, 1
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4
-; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
-entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1)
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
-; GFX8DAGISEL-LABEL: const_value_i64:
-; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8DAGISEL-NEXT: s_endpgm
-;
-; GFX8GISEL-LABEL: const_value_i64:
-; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8GISEL-NEXT: s_endpgm
-;
-; GFX9DAGISEL-LABEL: const_value_i64:
-; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9DAGISEL-NEXT: s_endpgm
-;
-; GFX9GISEL-LABEL: const_value_i64:
-; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9GISEL-NEXT: s_endpgm
-;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: const_value_i64:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164DAGISEL-NEXT: ; %bb.5:
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6
+; GFX1164DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: const_value_i64:
+; GFX1164GISEL-LABEL: divergent_cfg:
; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec
+; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6
+; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
+; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1164GISEL-NEXT: ; %bb.1: ; %else
+; GFX1164GISEL-NEXT: s_load_b32 s6, s[4:5], 0x2c
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: s_and_b32 s2, s2, 1
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164GISEL-NEXT: s_mul_i32 s6, s6, s2
+; GFX1164GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1]
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1164GISEL-NEXT: ; %bb.3: ; %if
+; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_mov_b32 s6, 0
+; GFX1164GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3]
+; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7
+; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7
+; GFX1164GISEL-NEXT: s_xor_b32 s6, s6, s8
+; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1164GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1]
+; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: const_value_i64:
+; GFX1132DAGISEL-LABEL: divergent_cfg:
; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo
+; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1
+; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
+; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
+; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1132DAGISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132DAGISEL-NEXT: s_mul_i32 s1, s1, s2
+; GFX1132DAGISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB2_6
+; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if
+; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0
+; GFX1132DAGISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1132DAGISEL-NEXT: s_xor_b32 s1, s1, s6
+; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132DAGISEL-NEXT: ; %bb.5:
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
+; GFX1132DAGISEL-NEXT: .LBB2_6: ; %endif
+; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: const_value_i64:
+; GFX1132GISEL-LABEL: divergent_cfg:
; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo
+; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0
+; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
+; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_2
+; GFX1132GISEL-NEXT: ; %bb.1: ; %else
+; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_and_b32 s3, s2, 1
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, 0
+; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT: s_and_b32 s2, s2, 1
+; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX1132GISEL-NEXT: s_mul_i32 s0, s0, s2
+; GFX1132GISEL-NEXT: .LBB2_2: ; %Flow
+; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB2_5
+; GFX1132GISEL-NEXT: ; %bb.3: ; %if
+; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
+; GFX1132GISEL-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3
+; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
+; GFX1132GISEL-NEXT: s_xor_b32 s0, s0, s6
+; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB2_4
+; GFX1132GISEL-NEXT: .LBB2_5: ; %endif
+; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
+; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1)
- store i64 %result, ptr addrspace(1) %out
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %d_cmp = icmp ult i32 %tid, 16
+ br i1 %d_cmp, label %if, label %else
+
+if:
+ %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %tid, i32 1)
+ br label %endif
+
+else:
+ %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %in, i32 1)
+ br label %endif
+
+endif:
+ %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
+ store i32 %combine, ptr addrspace(1) %out
ret void
}
-define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
-; GFX8DAGISEL-LABEL: poison_value_i64:
+define amdgpu_kernel void @uniform_value_i64(ptr addrspace(1) %out, i64 %in) {
+; GFX8DAGISEL-LABEL: uniform_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX8DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8DAGISEL-NEXT: s_and_b32 s4, s4, 1
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
+; GFX8DAGISEL-NEXT: s_mul_i32 s1, s3, s4
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
+; GFX8DAGISEL-NEXT: s_mul_i32 s0, s2, s4
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
; GFX8DAGISEL-NEXT: s_endpgm
;
-; GFX8GISEL-LABEL: poison_value_i64:
+; GFX8GISEL-LABEL: uniform_value_i64:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX8GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX8GISEL-NEXT: s_and_b32 s4, s4, 1
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX8GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX8GISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
@@ -1695,159 +974,159 @@ define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8GISEL-NEXT: s_endpgm
;
-; GFX9DAGISEL-LABEL: poison_value_i64:
+; GFX9DAGISEL-LABEL: uniform_value_i64:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX9DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9DAGISEL-NEXT: s_and_b32 s4, s4, 1
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX9DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
-; GFX9GISEL-LABEL: poison_value_i64:
+; GFX9GISEL-LABEL: uniform_value_i64:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX9GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
+; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX9GISEL-NEXT: s_and_b32 s4, s4, 1
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX9GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX9GISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX9GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
+; GFX1064DAGISEL-LABEL: uniform_value_i64:
; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064DAGISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1064DAGISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: s_endpgm
;
-; GFX1064GISEL-LABEL: poison_value_i64:
+; GFX1064GISEL-LABEL: uniform_value_i64:
; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1064GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
+; GFX1064GISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1064GISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1064GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1064GISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1064GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1064GISEL-NEXT: s_endpgm
;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
+; GFX1032DAGISEL-LABEL: uniform_value_i64:
; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032DAGISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
+; GFX1032DAGISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1032DAGISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1032DAGISEL-NEXT: s_endpgm
;
-; GFX1032GISEL-LABEL: poison_value_i64:
+; GFX1032GISEL-LABEL: uniform_value_i64:
; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1032GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX1032GISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
+; GFX1032GISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1032GISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1032GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1032GISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1032GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX1032GISEL-NEXT: s_endpgm
;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
+; GFX1164DAGISEL-LABEL: uniform_value_i64:
; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1164DAGISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1164DAGISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1164DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value_i64:
+; GFX1164GISEL-LABEL: uniform_value_i64:
; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1164GISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
+; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1164GISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1164GISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1164GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1164GISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX1164GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
+; GFX1132DAGISEL-LABEL: uniform_value_i64:
; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132DAGISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
+; GFX1132DAGISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1132DAGISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
; GFX1132DAGISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: poison_value_i64:
+; GFX1132GISEL-LABEL: uniform_value_i64:
; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
+; GFX1132GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1132GISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
+; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s4, s4
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_and_b32 s3, s2, 1
+; GFX1132GISEL-NEXT: s_and_b32 s4, s4, 1
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s3
-; GFX1132GISEL-NEXT: s_mul_i32 s3, s1, s3
+; GFX1132GISEL-NEXT: s_mul_i32 s2, s2, s4
+; GFX1132GISEL-NEXT: s_mul_i32 s3, s3, s4
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX1132GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
entry:
- %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1)
+ %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %in, i32 1)
store i64 %result, ptr addrspace(1) %out
ret void
}
@@ -1859,14 +1138,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX8DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8DAGISEL-NEXT: ; %bb.2:
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1880,14 +1159,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX8GISEL-NEXT: s_mov_b32 s4, 0
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_mov_b32 s5, s4
-; GFX8GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX8GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX8GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX8GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX8GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX8GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX8GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX8GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX8GISEL-NEXT: ; %bb.2:
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1901,14 +1180,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX9DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX9DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9DAGISEL-NEXT: ; %bb.2:
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1922,14 +1201,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX9GISEL-NEXT: s_mov_b32 s4, 0
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_mov_b32 s5, s4
-; GFX9GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX9GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX9GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX9GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX9GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX9GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX9GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX9GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX9GISEL-NEXT: ; %bb.2:
; GFX9GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX9GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1943,14 +1222,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1064DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064DAGISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064DAGISEL-NEXT: ; %bb.2:
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1963,14 +1242,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1064GISEL-NEXT: s_mov_b32 s4, 0
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1064GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1064GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1064GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7]
; GFX1064GISEL-NEXT: v_readlane_b32 s8, v2, s10
; GFX1064GISEL-NEXT: v_readlane_b32 s9, v3, s10
; GFX1064GISEL-NEXT: s_bitset0_b64 s[6:7], s10
; GFX1064GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0
-; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1064GISEL-NEXT: ; %bb.2:
; GFX1064GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1064GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -1983,14 +1262,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, 0
; GFX1032DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032DAGISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032DAGISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032DAGISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032DAGISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032DAGISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032DAGISEL-NEXT: ; %bb.2:
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2003,14 +1282,14 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1032GISEL-NEXT: s_mov_b32 s4, 0
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_mov_b32 s5, s4
-; GFX1032GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1032GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1032GISEL-NEXT: s_ff1_i32_b32 s7, s6
; GFX1032GISEL-NEXT: v_readlane_b32 s8, v2, s7
; GFX1032GISEL-NEXT: v_readlane_b32 s9, v3, s7
; GFX1032GISEL-NEXT: s_bitset0_b32 s6, s7
; GFX1032GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
; GFX1032GISEL-NEXT: s_cmp_lg_u32 s6, 0
-; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1032GISEL-NEXT: ; %bb.2:
; GFX1032GISEL-NEXT: v_mov_b32_e32 v2, s4
; GFX1032GISEL-NEXT: v_mov_b32_e32 v3, s5
@@ -2023,7 +1302,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164DAGISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -2031,7 +1310,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164DAGISEL-NEXT: ; %bb.2:
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2044,7 +1323,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_mov_b32 s0, 0
; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1164GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1164GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1164GISEL-NEXT: s_ctz_i32_b64 s6, s[2:3]
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1164GISEL-NEXT: v_readlane_b32 s4, v2, s6
@@ -2052,7 +1331,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s6
; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0
-; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1164GISEL-NEXT: ; %bb.2:
; GFX1164GISEL-NEXT: v_mov_b32_e32 v3, s1
; GFX1164GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2065,7 +1344,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_mov_b32 s0, 0
; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132DAGISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132DAGISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132DAGISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132DAGISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2073,7 +1352,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132DAGISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132DAGISEL-NEXT: ; %bb.2:
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132DAGISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2085,7 +1364,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_mov_b32 s0, 0
; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
; GFX1132GISEL-NEXT: s_mov_b32 s1, s0
-; GFX1132GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GFX1132GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1132GISEL-NEXT: v_readlane_b32 s4, v2, s3
@@ -2093,7 +1372,7 @@ define void @divergent_value_i64(ptr addrspace(1) %out, i64 %id.x) {
; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3
; GFX1132GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB8_1
+; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_1
; GFX1132GISEL-NEXT: ; %bb.2:
; GFX1132GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
; GFX1132GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off
@@ -2113,7 +1392,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2121,7 +1400,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX8DAGISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX8DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -2149,7 +1428,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX8GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX8GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX8GISEL-NEXT: ; %bb.1: ; %else
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2157,10 +1436,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX8GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX8GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX8GISEL-NEXT: .LBB5_2: ; %Flow
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX8GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX8GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX8GISEL-NEXT: ; %bb.3: ; %if
; GFX8GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -2169,7 +1448,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: s_mul_i32 s6, s4, s7
; GFX8GISEL-NEXT: s_mul_i32 s7, s5, s7
-; GFX8GISEL-NEXT: .LBB9_4: ; %endif
+; GFX8GISEL-NEXT: .LBB5_4: ; %endif
; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
@@ -2186,7 +1465,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
@@ -2194,7 +1473,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_mul_i32 s4, s2, s5
; GFX9DAGISEL-NEXT: s_mul_i32 s5, s3, s5
-; GFX9DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -2221,7 +1500,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX9GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX9GISEL-NEXT: ; %bb.1: ; %else
; GFX9GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2229,10 +1508,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX9GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX9GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX9GISEL-NEXT: .LBB5_2: ; %Flow
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX9GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX9GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX9GISEL-NEXT: ; %bb.3: ; %if
; GFX9GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -2241,7 +1520,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: s_mul_i32 s6, s6, s4
; GFX9GISEL-NEXT: s_mul_i32 s7, s7, s4
-; GFX9GISEL-NEXT: .LBB9_4: ; %endif
+; GFX9GISEL-NEXT: .LBB5_4: ; %endif
; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2258,7 +1537,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_mov_b64 s[4:5], exec
; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
@@ -2266,7 +1545,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_mul_i32 s4, s2, s5
; GFX1064DAGISEL-NEXT: s_mul_i32 s5, s3, s5
-; GFX1064DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -2293,7 +1572,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX1064GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1064GISEL-NEXT: ; %bb.1: ; %else
; GFX1064GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
@@ -2301,10 +1580,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1064GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1064GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1064GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[2:3], s[8:9]
-; GFX1064GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1064GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1064GISEL-NEXT: ; %bb.3: ; %if
; GFX1064GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1064GISEL-NEXT: s_mov_b64 s[4:5], exec
@@ -2313,7 +1592,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064GISEL-NEXT: s_mul_i32 s6, s6, s4
; GFX1064GISEL-NEXT: s_mul_i32 s7, s7, s4
-; GFX1064GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1064GISEL-NEXT: .LBB5_4: ; %endif
; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2330,7 +1609,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_mov_b32 s4, exec_lo
; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s4, s4
@@ -2338,7 +1617,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_mul_i32 s4, s2, s5
; GFX1032DAGISEL-NEXT: s_mul_i32 s5, s3, s5
-; GFX1032DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -2365,7 +1644,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX1032GISEL-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GFX1032GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1032GISEL-NEXT: ; %bb.1: ; %else
; GFX1032GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s6, s6
@@ -2373,10 +1652,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1032GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1032GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1032GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s2, s8
-; GFX1032GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1032GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1032GISEL-NEXT: ; %bb.3: ; %if
; GFX1032GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -2385,7 +1664,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032GISEL-NEXT: s_mul_i32 s6, s6, s3
; GFX1032GISEL-NEXT: s_mul_i32 s7, s7, s3
-; GFX1032GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1032GISEL-NEXT: .LBB5_4: ; %endif
; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2404,7 +1683,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2413,7 +1692,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1164DAGISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1164DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[2:3], s[8:9]
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s6
@@ -2444,7 +1723,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1164GISEL-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1164GISEL-NEXT: ; %bb.1: ; %else
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2453,10 +1732,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1164GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1164GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1164GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[2:3], s[8:9]
-; GFX1164GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1164GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1164GISEL-NEXT: ; %bb.3: ; %if
; GFX1164GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1164GISEL-NEXT: s_mov_b64 s[6:7], exec
@@ -2466,7 +1745,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: s_mul_i32 s6, s4, s7
; GFX1164GISEL-NEXT: s_mul_i32 s7, s5, s7
-; GFX1164GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1164GISEL-NEXT: .LBB5_4: ; %endif
; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, s7
@@ -2485,7 +1764,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2494,7 +1773,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1132DAGISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1132DAGISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132DAGISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s2, s8
; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
@@ -2524,7 +1803,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0
; GFX1132GISEL-NEXT: s_xor_b32 s8, exec_lo, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_2
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_2
; GFX1132GISEL-NEXT: ; %bb.1: ; %else
; GFX1132GISEL-NEXT: s_mov_b32 s6, exec_lo
; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
@@ -2533,10 +1812,10 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mul_i32 s6, s2, s7
; GFX1132GISEL-NEXT: s_mul_i32 s7, s3, s7
-; GFX1132GISEL-NEXT: .LBB9_2: ; %Flow
+; GFX1132GISEL-NEXT: .LBB5_2: ; %Flow
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s2, s8
-; GFX1132GISEL-NEXT: s_cbranch_execz .LBB9_4
+; GFX1132GISEL-NEXT: s_cbranch_execz .LBB5_4
; GFX1132GISEL-NEXT: ; %bb.3: ; %if
; GFX1132GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo
@@ -2546,7 +1825,7 @@ define amdgpu_kernel void @divergent_cfg_i64(ptr addrspace(1) %out, i64 %in, i64
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: s_mul_i32 s6, s4, s3
; GFX1132GISEL-NEXT: s_mul_i32 s7, s5, s3
-; GFX1132GISEL-NEXT: .LBB9_4: ; %endif
+; GFX1132GISEL-NEXT: .LBB5_4: ; %endif
; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
; GFX1132GISEL-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
index 863598578ea77..a27a121a3af61 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
@@ -2,13 +2,14 @@
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
; --------------------------------------------------------------------
-; llvm.amdgcn.wave.reduce.umin.i32
+; llvm.amdgcn.wave.reduce.umin
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.umin.i64(i64, i32 immarg)
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_poison(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
@@ -19,8 +20,8 @@ entry:
ret void
}
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_const(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
@@ -31,14 +32,94 @@ entry:
ret void
}
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.min
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.min.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.min.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
; --------------------------------------------------------------------
-; llvm.amdgcn.wave.reduce.umin.i32
+; llvm.amdgcn.wave.reduce.umax
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.umax.i64(i64, i32 immarg)
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_poison(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
@@ -49,8 +130,8 @@ entry:
ret void
}
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_const(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
@@ -61,6 +142,30 @@ entry:
ret void
}
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
@gv = constant i32 0
define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) {
; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv(
@@ -74,3 +179,333 @@ entry:
store i32 %result, ptr addrspace(1) %out
ret void
}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.max
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.max.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.max.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.add
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.add.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.add.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.sub
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.sub.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.sub.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.and
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.and.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.and.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.or
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.or.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.or.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.xor
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.xor.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.xor.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1)
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-branch-commits
mailing list