[llvm] [AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: accept memory instructions in the "then" block (PR #109995)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 07:58:34 PDT 2024
https://github.com/jmmartinez updated https://github.com/llvm/llvm-project/pull/109995
>From 6656481eb291d663d6c85d0f7e41f24200e55409 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Wed, 25 Sep 2024 16:01:40 +0200
Subject: [PATCH] [AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: accept
memory instructions in the "then" block
---
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 5 +-
.../atomic_optimizations_mul_one.ll | 36 ++++------
...vergence-divergent-i1-used-outside-loop.ll | 12 ++--
.../GlobalISel/divergence-structurizer.ll | 3 +-
.../divergence-temporal-divergent-i1.ll | 3 +-
.../AMDGPU/GlobalISel/vni8-across-blocks.ll | 18 ++---
.../AMDGPU/amdgpu-demote-scc-branches.ll | 27 +++----
.../atomic_optimizations_pixelshader.ll | 36 ++++------
llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 9 +--
.../CodeGen/AMDGPU/branch-condition-and.ll | 3 +-
.../AMDGPU/cgp-addressing-modes-flat.ll | 72 +++++++------------
llvm/test/CodeGen/AMDGPU/collapse-endcf.ll | 12 ++--
.../CodeGen/AMDGPU/insert-skips-gfx10.mir | 4 +-
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 18 ++---
.../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 18 ++---
.../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll | 3 +-
.../CodeGen/AMDGPU/local-atomicrmw-fadd.ll | 65 ++++++-----------
llvm/test/CodeGen/AMDGPU/ret_jump.ll | 1 -
.../AMDGPU/set-inactive-wwm-overwrite.ll | 3 +-
...si-lower-control-flow-unreachable-block.ll | 6 +-
.../AMDGPU/unstructured-cfg-def-use-issue.ll | 12 ++--
.../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 18 ++---
llvm/test/CodeGen/AMDGPU/wqm.ll | 6 +-
...dgpu_generated_funcs.ll.generated.expected | 6 +-
...pu_generated_funcs.ll.nogenerated.expected | 6 +-
25 files changed, 133 insertions(+), 269 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 1334029544f999..51bd596affc3ae 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -326,9 +326,8 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
return true;
- // These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
- TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
+ // Waitcnt instructions are potentially expensive even if EXEC = 0.
+ if (TII->isWaitcnt(MI.getOpcode()))
return true;
++NumInstr;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
index bdfafa89cd0477..6888d40bf2b4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
@@ -41,13 +41,12 @@ define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg) {
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_atomic_add v1, v0, s[0:3], 0 idxen
-; GCN-NEXT: .LBB0_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_endpgm
.entry:
call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
@@ -87,13 +86,12 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT: .LBB1_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -139,13 +137,12 @@ define amdgpu_cs void @atomic_sub(<4 x i32> inreg %arg) {
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB2_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_atomic_sub v1, v0, s[0:3], 0 idxen
-; GCN-NEXT: .LBB2_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_endpgm
.entry:
call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
@@ -185,13 +182,12 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: s_cbranch_execz .LBB3_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT: .LBB3_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -238,14 +234,13 @@ define amdgpu_cs void @atomic_xor(<4 x i32> inreg %arg) {
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB4_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GCN-NEXT: s_and_b32 s4, s4, 1
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_atomic_xor v1, v0, s[0:3], 0 idxen
-; GCN-NEXT: .LBB4_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_endpgm
.entry:
call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
@@ -287,14 +282,13 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: s_cbranch_execz .LBB5_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN-NEXT: s_and_b32 s6, s6, 1
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT: .LBB5_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -341,13 +335,12 @@ define amdgpu_cs void @atomic_ptr_add(ptr addrspace(8) inreg %arg) {
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB6_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_atomic_add v1, v0, s[0:3], 0 idxen
-; GCN-NEXT: .LBB6_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_endpgm
.entry:
call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) %arg, i32 0, i32 0, i32 0, i32 0)
@@ -389,13 +382,12 @@ define amdgpu_cs void @atomic_ptr_add_and_format(ptr addrspace(8) inreg %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: s_cbranch_execz .LBB7_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT: .LBB7_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -443,13 +435,12 @@ define amdgpu_cs void @atomic_ptr_sub(ptr addrspace(8) inreg %arg) {
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB8_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_atomic_sub v1, v0, s[0:3], 0 idxen
-; GCN-NEXT: .LBB8_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_endpgm
.entry:
call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32 1, ptr addrspace(8) %arg, i32 0, i32 0, i32 0, i32 0)
@@ -491,13 +482,12 @@ define amdgpu_cs void @atomic_ptr_sub_and_format(ptr addrspace(8) inreg %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: s_cbranch_execz .LBB9_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT: .LBB9_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v1
@@ -546,14 +536,13 @@ define amdgpu_cs void @atomic_ptr_xor(ptr addrspace(8) inreg %arg) {
; GCN-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s6, v0
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB10_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GCN-NEXT: s_and_b32 s4, s4, 1
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: buffer_atomic_xor v1, v0, s[0:3], 0 idxen
-; GCN-NEXT: .LBB10_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_endpgm
.entry:
call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32 1, ptr addrspace(8) %arg, i32 0, i32 0, i32 0, i32 0)
@@ -597,14 +586,13 @@ define amdgpu_cs void @atomic_ptr_xor_and_format(ptr addrspace(8) inreg %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT: s_cbranch_execz .LBB11_2
; GCN-NEXT: ; %bb.1:
; GCN-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GCN-NEXT: s_and_b32 s6, s6, 1
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT: .LBB11_2:
+; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 s4, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index b27d8fdc24ff73..e08d4f0da86b88 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
@@ -249,11 +249,10 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
; GFX10-NEXT: .LBB3_6: ; %Flow1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB3_8
; GFX10-NEXT: ; %bb.7: ; %block.after.loop
; GFX10-NEXT: v_mov_b32_e32 v0, 5
; GFX10-NEXT: flat_store_dword v[3:4], v0
-; GFX10-NEXT: .LBB3_8: ; %exit
+; GFX10-NEXT: ; %bb.8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -315,7 +314,6 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: v_mov_b32_e32 v4, v5
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4
; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB4_4
; GFX10-NEXT: ; %bb.3: ; %if.block.0
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
@@ -323,7 +321,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: v_add_co_u32 v8, s4, v2, v8
; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
; GFX10-NEXT: global_store_dword v[8:9], v4, off
-; GFX10-NEXT: .LBB4_4: ; %loop.break.block
+; GFX10-NEXT: ; %bb.4: ; %loop.break.block
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
@@ -342,10 +340,9 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
; GFX10-NEXT: .LBB4_6: ; %cond.block.1
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
-; GFX10-NEXT: s_cbranch_execz .LBB4_8
; GFX10-NEXT: ; %bb.7: ; %if.block.1
; GFX10-NEXT: global_store_dword v[6:7], v4, off
-; GFX10-NEXT: .LBB4_8: ; %exit
+; GFX10-NEXT: ; %bb.8: ; %exit
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -536,11 +533,10 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_and_saveexec_b32 s0, s1
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB6_6
; GFX10-NEXT: ; %bb.5: ; %break.body
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: .LBB6_6: ; %exit
+; GFX10-NEXT: ; %bb.6: ; %exit
; GFX10-NEXT: s_endpgm
entry:
br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index 1698f84eea5185..50af8cb739e48c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -437,11 +437,10 @@ define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr ad
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_and_saveexec_b32 s0, s1
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB5_6
; GFX10-NEXT: ; %bb.5: ; %break.body
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: global_store_dword v[4:5], v0, off
-; GFX10-NEXT: .LBB5_6: ; %exit
+; GFX10-NEXT: ; %bb.6: ; %exit
; GFX10-NEXT: s_endpgm
entry:
br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
index 1855ede0483def..ea671a7adc7384 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
@@ -152,12 +152,11 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: s_and_saveexec_b32 s1, s0
; GFX10-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX10-NEXT: s_cbranch_execz .LBB2_7
; GFX10-NEXT: ; %bb.6: ; %break.body
; GFX10-NEXT: v_mov_b32_e32 v0, 10
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: global_store_dword v1, v0, s[2:3]
-; GFX10-NEXT: .LBB2_7: ; %exit
+; GFX10-NEXT: ; %bb.7: ; %exit
; GFX10-NEXT: s_endpgm
entry:
br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index 386e34f72ab734..98d064fcd3c7aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -68,10 +68,9 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dword v1, v2, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB1_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dword v1, v2, s[6:7]
-; GFX906-NEXT: .LBB1_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(0)
@@ -149,10 +148,9 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB3_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[6:7]
-; GFX906-NEXT: .LBB3_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(0)
@@ -185,10 +183,9 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx4 v[1:4], v5, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB4_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx4 v[1:4], v5, s[6:7]
-; GFX906-NEXT: .LBB4_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(0)
@@ -222,11 +219,10 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
; GFX906-NEXT: global_load_dwordx4 v[1:4], v9, s[4:5]
; GFX906-NEXT: global_load_dwordx4 v[5:8], v9, s[4:5] offset:16
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB5_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx4 v[1:4], v9, s[6:7]
; GFX906-NEXT: global_load_dwordx4 v[5:8], v9, s[6:7] offset:16
-; GFX906-NEXT: .LBB5_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(1)
@@ -486,14 +482,13 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB8_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[6:7]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
; GFX906-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX906-NEXT: s_and_b64 s[4:5], exec, vcc
; GFX906-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
-; GFX906-NEXT: .LBB8_2: ; %Flow
+; GFX906-NEXT: ; %bb.2: ; %Flow
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
; GFX906-NEXT: s_cbranch_execz .LBB8_4
@@ -547,11 +542,10 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
; GFX906-NEXT: global_load_dwordx2 v[1:2], v5, s[6:7]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB9_3
; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: global_store_dwordx2 v0, v[3:4], s[8:9]
-; GFX906-NEXT: .LBB9_3: ; %Flow
+; GFX906-NEXT: ; %bb.3: ; %Flow
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: .LBB9_4: ; %bb.3
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll
index 9319f0d3f5d40f..90bbb4af85e267 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll
@@ -150,7 +150,6 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB3_2
; GFX9-NEXT: ; %bb.1: ; %if.then
; GFX9-NEXT: s_mov_b32 s11, s18
; GFX9-NEXT: s_mov_b32 s10, s17
@@ -159,7 +158,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
; GFX9-NEXT: v_mov_b32_e32 v0, s6
; GFX9-NEXT: v_mov_b32_e32 v1, s19
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX9-NEXT: .LBB3_2: ; %if.end
+; GFX9-NEXT: ; %bb.2: ; %if.end
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -169,7 +168,6 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX1010-NEXT: s_cbranch_execz .LBB3_2
; GFX1010-NEXT: ; %bb.1: ; %if.then
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
@@ -178,7 +176,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
; GFX1010-NEXT: s_mov_b32 s9, s16
; GFX1010-NEXT: s_mov_b32 s8, s7
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1010-NEXT: .LBB3_2: ; %if.end
+; GFX1010-NEXT: ; %bb.2: ; %if.end
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -189,7 +187,6 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
-; GFX1030-NEXT: s_cbranch_execz .LBB3_2
; GFX1030-NEXT: ; %bb.1: ; %if.then
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
@@ -198,7 +195,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
; GFX1030-NEXT: s_mov_b32 s9, s16
; GFX1030-NEXT: s_mov_b32 s8, s7
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1030-NEXT: .LBB3_2: ; %if.end
+; GFX1030-NEXT: ; %bb.2: ; %if.end
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: s_setpc_b64 s[30:31]
@@ -221,7 +218,6 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB4_2
; GFX9-NEXT: ; %bb.1: ; %if.then
; GFX9-NEXT: s_mov_b32 s11, s18
; GFX9-NEXT: s_mov_b32 s10, s17
@@ -230,7 +226,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
; GFX9-NEXT: v_mov_b32_e32 v0, s6
; GFX9-NEXT: v_mov_b32_e32 v1, s19
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX9-NEXT: .LBB4_2: ; %if.end
+; GFX9-NEXT: ; %bb.2: ; %if.end
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -240,7 +236,6 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX1010-NEXT: s_cbranch_execz .LBB4_2
; GFX1010-NEXT: ; %bb.1: ; %if.then
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
@@ -249,7 +244,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
; GFX1010-NEXT: s_mov_b32 s9, s16
; GFX1010-NEXT: s_mov_b32 s8, s7
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1010-NEXT: .LBB4_2: ; %if.end
+; GFX1010-NEXT: ; %bb.2: ; %if.end
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -260,7 +255,6 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
-; GFX1030-NEXT: s_cbranch_execz .LBB4_2
; GFX1030-NEXT: ; %bb.1: ; %if.then
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
@@ -269,7 +263,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
; GFX1030-NEXT: s_mov_b32 s9, s16
; GFX1030-NEXT: s_mov_b32 s8, s7
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1030-NEXT: .LBB4_2: ; %if.end
+; GFX1030-NEXT: ; %bb.2: ; %if.end
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: s_setpc_b64 s[30:31]
@@ -292,7 +286,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB5_2
; GFX9-NEXT: ; %bb.1: ; %if.then
; GFX9-NEXT: s_mov_b32 s11, s18
; GFX9-NEXT: s_mov_b32 s10, s17
@@ -301,7 +294,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
; GFX9-NEXT: v_mov_b32_e32 v0, s6
; GFX9-NEXT: v_mov_b32_e32 v1, s19
; GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX9-NEXT: .LBB5_2: ; %if.end
+; GFX9-NEXT: ; %bb.2: ; %if.end
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -311,7 +304,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-NEXT: v_cmp_lt_i32_e32 vcc_lo, 0, v0
; GFX1010-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX1010-NEXT: s_cbranch_execz .LBB5_2
; GFX1010-NEXT: ; %bb.1: ; %if.then
; GFX1010-NEXT: v_mov_b32_e32 v0, s6
; GFX1010-NEXT: v_mov_b32_e32 v1, s19
@@ -320,7 +312,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
; GFX1010-NEXT: s_mov_b32 s9, s16
; GFX1010-NEXT: s_mov_b32 s8, s7
; GFX1010-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1010-NEXT: .LBB5_2: ; %if.end
+; GFX1010-NEXT: ; %bb.2: ; %if.end
; GFX1010-NEXT: s_waitcnt_depctr 0xffe3
; GFX1010-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1010-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -331,7 +323,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: s_mov_b32 s4, exec_lo
; GFX1030-NEXT: v_cmpx_lt_i32_e32 0, v0
-; GFX1030-NEXT: s_cbranch_execz .LBB5_2
; GFX1030-NEXT: ; %bb.1: ; %if.then
; GFX1030-NEXT: v_mov_b32_e32 v0, s6
; GFX1030-NEXT: v_mov_b32_e32 v1, s19
@@ -340,7 +331,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
; GFX1030-NEXT: s_mov_b32 s9, s16
; GFX1030-NEXT: s_mov_b32 s8, s7
; GFX1030-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1030-NEXT: .LBB5_2: ; %if.end
+; GFX1030-NEXT: ; %bb.2: ; %if.end
; GFX1030-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 429e6c489bf6f8..37443760f1bceb 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -27,13 +27,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7-NEXT: ; implicit-def: $vgpr1
; GFX7-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB0_3
; GFX7-NEXT: ; %bb.2:
; GFX7-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
; GFX7-NEXT: s_mul_i32 s12, s12, 5
; GFX7-NEXT: v_mov_b32_e32 v1, s12
; GFX7-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX7-NEXT: .LBB0_3:
+; GFX7-NEXT: ; %bb.3:
; GFX7-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_readfirstlane_b32 s4, v1
@@ -62,13 +61,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX89-NEXT: ; implicit-def: $vgpr1
; GFX89-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX89-NEXT: s_cbranch_execz .LBB0_3
; GFX89-NEXT: ; %bb.2:
; GFX89-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
; GFX89-NEXT: s_mul_i32 s12, s12, 5
; GFX89-NEXT: v_mov_b32_e32 v1, s12
; GFX89-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX89-NEXT: .LBB0_3:
+; GFX89-NEXT: ; %bb.3:
; GFX89-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX89-NEXT: s_waitcnt vmcnt(0)
; GFX89-NEXT: v_readfirstlane_b32 s4, v1
@@ -97,13 +95,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, s13, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX1064-NEXT: s_cbranch_execz .LBB0_3
; GFX1064-NEXT: ; %bb.2:
; GFX1064-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
; GFX1064-NEXT: s_mul_i32 s12, s12, 5
; GFX1064-NEXT: v_mov_b32_e32 v1, s12
; GFX1064-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX1064-NEXT: .LBB0_3:
+; GFX1064-NEXT: ; %bb.3:
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
; GFX1064-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -132,13 +129,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s9, vcc_lo
-; GFX1032-NEXT: s_cbranch_execz .LBB0_3
; GFX1032-NEXT: ; %bb.2:
; GFX1032-NEXT: s_bcnt1_i32_b32 s10, s10
; GFX1032-NEXT: s_mul_i32 s10, s10, 5
; GFX1032-NEXT: v_mov_b32_e32 v1, s10
; GFX1032-NEXT: buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX1032-NEXT: .LBB0_3:
+; GFX1032-NEXT: ; %bb.3:
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -170,14 +166,13 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, s13, v0
; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0
-; GFX1164-NEXT: s_cbranch_execz .LBB0_3
; GFX1164-NEXT: ; %bb.2:
; GFX1164-NEXT: s_bcnt1_i32_b64 s12, s[12:13]
; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1164-NEXT: s_mul_i32 s12, s12, 5
; GFX1164-NEXT: v_mov_b32_e32 v1, s12
; GFX1164-NEXT: buffer_atomic_add_u32 v1, off, s[4:7], 0 glc
-; GFX1164-NEXT: .LBB0_3:
+; GFX1164-NEXT: ; %bb.3:
; GFX1164-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1164-NEXT: s_waitcnt vmcnt(0)
; GFX1164-NEXT: v_readfirstlane_b32 s4, v1
@@ -211,14 +206,13 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
; GFX1132-NEXT: ; implicit-def: $vgpr1
; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0
-; GFX1132-NEXT: s_cbranch_execz .LBB0_3
; GFX1132-NEXT: ; %bb.2:
; GFX1132-NEXT: s_bcnt1_i32_b32 s10, s10
; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1132-NEXT: s_mul_i32 s10, s10, 5
; GFX1132-NEXT: v_mov_b32_e32 v1, s10
; GFX1132-NEXT: buffer_atomic_add_u32 v1, off, s[4:7], 0 glc
-; GFX1132-NEXT: .LBB0_3:
+; GFX1132-NEXT: ; %bb.3:
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_readfirstlane_b32 s4, v1
@@ -298,11 +292,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB1_3
; GFX8-NEXT: ; %bb.2:
; GFX8-NEXT: v_mov_b32_e32 v0, s12
; GFX8-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX8-NEXT: .LBB1_3:
+; GFX8-NEXT: ; %bb.3:
; GFX8-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
@@ -353,11 +346,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB1_3
; GFX9-NEXT: ; %bb.2:
; GFX9-NEXT: v_mov_b32_e32 v0, s12
; GFX9-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX9-NEXT: .LBB1_3:
+; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
@@ -412,11 +404,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX1064-NEXT: s_cbranch_execz .LBB1_3
; GFX1064-NEXT: ; %bb.2:
; GFX1064-NEXT: v_mov_b32_e32 v0, s12
; GFX1064-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX1064-NEXT: .LBB1_3:
+; GFX1064-NEXT: ; %bb.3:
; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
; GFX1064-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1064-NEXT: s_waitcnt vmcnt(0)
@@ -462,11 +453,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s9, vcc_lo
-; GFX1032-NEXT: s_cbranch_execz .LBB1_3
; GFX1032-NEXT: ; %bb.2:
; GFX1032-NEXT: v_mov_b32_e32 v0, s11
; GFX1032-NEXT: buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX1032-NEXT: .LBB1_3:
+; GFX1032-NEXT: ; %bb.3:
; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
; GFX1032-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX1032-NEXT: s_waitcnt vmcnt(0)
@@ -531,11 +521,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1164-NEXT: ; implicit-def: $vgpr0
; GFX1164-NEXT: s_and_saveexec_b64 s[10:11], vcc
-; GFX1164-NEXT: s_cbranch_execz .LBB1_3
; GFX1164-NEXT: ; %bb.2:
; GFX1164-NEXT: v_mov_b32_e32 v0, s12
; GFX1164-NEXT: buffer_atomic_add_u32 v0, off, s[4:7], 0 glc
-; GFX1164-NEXT: .LBB1_3:
+; GFX1164-NEXT: ; %bb.3:
; GFX1164-NEXT: s_or_b64 exec, exec, s[10:11]
; GFX1164-NEXT: s_waitcnt vmcnt(0)
; GFX1164-NEXT: v_readfirstlane_b32 s4, v0
@@ -591,11 +580,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1132-NEXT: ; implicit-def: $vgpr0
; GFX1132-NEXT: s_and_saveexec_b32 s9, vcc_lo
-; GFX1132-NEXT: s_cbranch_execz .LBB1_3
; GFX1132-NEXT: ; %bb.2:
; GFX1132-NEXT: v_mov_b32_e32 v0, s11
; GFX1132-NEXT: buffer_atomic_add_u32 v0, off, s[4:7], 0 glc
-; GFX1132-NEXT: .LBB1_3:
+; GFX1132-NEXT: ; %bb.3:
; GFX1132-NEXT: s_or_b32 exec_lo, exec_lo, s9
; GFX1132-NEXT: s_waitcnt vmcnt(0)
; GFX1132-NEXT: v_readfirstlane_b32 s4, v0
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index 4f0bc512565d13..a93104d978003c 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -153,12 +153,11 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
; GFX90A-NEXT: ; implicit-def: $vgpr3
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
-; GFX90A-NEXT: s_cbranch_execz .LBB1_3
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global
; GFX90A-NEXT: global_atomic_add_f32 v3, v[0:1], v2, off glc
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: ; implicit-def: $vgpr2
-; GFX90A-NEXT: .LBB1_3: ; %Flow
+; GFX90A-NEXT: ; %bb.3: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB1_5
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.private
@@ -241,12 +240,11 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
; GFX908-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX908-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
-; GFX908-NEXT: s_cbranch_execz .LBB2_5
; GFX908-NEXT: ; %bb.4: ; %atomicrmw.global
; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off
; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX908-NEXT: ; implicit-def: $vgpr2
-; GFX908-NEXT: .LBB2_5: ; %Flow
+; GFX908-NEXT: ; %bb.5: ; %Flow
; GFX908-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GFX908-NEXT: s_cbranch_execz .LBB2_7
; GFX908-NEXT: ; %bb.6: ; %atomicrmw.private
@@ -291,12 +289,11 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7]
-; GFX90A-NEXT: s_cbranch_execz .LBB2_5
; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global
; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off
; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX90A-NEXT: ; implicit-def: $vgpr2
-; GFX90A-NEXT: .LBB2_5: ; %Flow
+; GFX90A-NEXT: ; %bb.5: ; %Flow
; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
; GFX90A-NEXT: s_cbranch_execz .LBB2_7
; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.private
diff --git a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
index cc05129b1b2af6..bb8a69d804b07d 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
@@ -14,12 +14,11 @@
; GCN-DAG: v_cmp_lt_f32_e32 vcc,
; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; GCN-NEXT: s_cbranch_execz .LBB0_{{[0-9]+}}
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %bb4
; GCN: ds_write_b32
-; GCN: .LBB0_{{[0-9]+}}: ; %UnifiedReturnBlock
+; GCN: %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
; GCN-NEXT: s_endpgm
; GCN-NEXT: .Lfunc_end
define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index fdae1696a5a492..ba3d6a498c8328 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -76,12 +76,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB0_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 28, v2
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX7-NEXT: flat_load_dword v4, v[2:3]
-; GFX7-NEXT: .LBB0_2: ; %endif
+; GFX7-NEXT: ; %bb.2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -96,12 +95,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB0_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_dword v4, v[2:3]
-; GFX8-NEXT: .LBB0_2: ; %endif
+; GFX8-NEXT: ; %bb.2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -116,10 +114,9 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB0_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: flat_load_dword v4, v[2:3] offset:28
-; GFX9-NEXT: .LBB0_2: ; %endif
+; GFX9-NEXT: ; %bb.2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -134,10 +131,9 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB0_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: flat_load_dword v4, v[2:3] offset:28
-; GFX10-NEXT: .LBB0_2: ; %endif
+; GFX10-NEXT: ; %bb.2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -232,13 +228,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB1_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
-; GFX7-NEXT: .LBB1_2: ; %endif
+; GFX7-NEXT: ; %bb.2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -253,12 +248,11 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB1_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_dword v4, v[2:3]
-; GFX8-NEXT: .LBB1_2: ; %endif
+; GFX8-NEXT: ; %bb.2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -273,10 +267,9 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB1_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28
-; GFX9-NEXT: .LBB1_2: ; %endif
+; GFX9-NEXT: ; %bb.2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -291,10 +284,9 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28
-; GFX10-NEXT: .LBB1_2: ; %endif
+; GFX10-NEXT: ; %bb.2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -345,13 +337,12 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB2_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
-; GFX7-NEXT: .LBB2_2: ; %endif
+; GFX7-NEXT: ; %bb.2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -366,12 +357,11 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB2_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_dword v4, v[2:3]
-; GFX8-NEXT: .LBB2_2: ; %endif
+; GFX8-NEXT: ; %bb.2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -386,10 +376,9 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB2_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28
-; GFX9-NEXT: .LBB2_2: ; %endif
+; GFX9-NEXT: ; %bb.2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -404,10 +393,9 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB2_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28
-; GFX10-NEXT: .LBB2_2: ; %endif
+; GFX10-NEXT: ; %bb.2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -513,12 +501,11 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB3_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0xfff, v2
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX7-NEXT: flat_load_sbyte v4, v[2:3]
-; GFX7-NEXT: .LBB3_2: ; %endif
+; GFX7-NEXT: ; %bb.2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -534,12 +521,11 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB3_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xfff, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_sbyte v4, v[2:3]
-; GFX8-NEXT: .LBB3_2: ; %endif
+; GFX8-NEXT: ; %bb.2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -555,10 +541,9 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB3_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: flat_load_sbyte v4, v[2:3] offset:4095
-; GFX9-NEXT: .LBB3_2: ; %endif
+; GFX9-NEXT: ; %bb.2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -574,12 +559,11 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB3_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x800, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: flat_load_sbyte v4, v[2:3] offset:2047
-; GFX10-NEXT: .LBB3_2: ; %endif
+; GFX10-NEXT: ; %bb.2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -634,12 +618,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB4_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0x1000, v2
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX7-NEXT: flat_load_sbyte v4, v[2:3]
-; GFX7-NEXT: .LBB4_2: ; %endif
+; GFX7-NEXT: ; %bb.2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x61a7c, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -655,12 +638,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB4_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_sbyte v4, v[2:3]
-; GFX8-NEXT: .LBB4_2: ; %endif
+; GFX8-NEXT: ; %bb.2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x61a7c, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -676,12 +658,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB4_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: flat_load_sbyte v4, v[2:3]
-; GFX9-NEXT: .LBB4_2: ; %endif
+; GFX9-NEXT: ; %bb.2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x61000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -697,12 +678,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB4_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x1000, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: flat_load_sbyte v4, v[2:3]
-; GFX10-NEXT: .LBB4_2: ; %endif
+; GFX10-NEXT: ; %bb.2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x61800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -757,12 +737,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
; GFX7-NEXT: v_mov_b32_e32 v6, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT: s_cbranch_execz .LBB5_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
; GFX7-NEXT: flat_load_sbyte v6, v[2:3]
-; GFX7-NEXT: .LBB5_2: ; %endif
+; GFX7-NEXT: ; %bb.2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -778,12 +757,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
; GFX8-NEXT: v_mov_b32_e32 v6, 0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB5_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
; GFX8-NEXT: flat_load_sbyte v6, v[2:3]
-; GFX8-NEXT: .LBB5_2: ; %endif
+; GFX8-NEXT: ; %bb.2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -799,12 +777,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT: s_cbranch_execz .LBB5_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GFX9-NEXT: flat_load_sbyte v6, v[2:3]
-; GFX9-NEXT: .LBB5_2: ; %endif
+; GFX9-NEXT: ; %bb.2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -820,12 +797,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10-NEXT: v_mov_b32_e32 v6, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB5_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
; GFX10-NEXT: flat_load_sbyte v6, v[2:3]
-; GFX10-NEXT: .LBB5_2: ; %endif
+; GFX10-NEXT: ; %bb.2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 67a084068941a4..840679be252aa5 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -192,13 +192,12 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0
; GCN-NEXT: buffer_store_dword v4, v[3:4], s[0:3], 0 addr64
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_3
; GCN-NEXT: ; %bb.2: ; %bb.inner.then
; GCN-NEXT: s_mov_b32 s0, s2
; GCN-NEXT: s_mov_b32 s1, s2
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:4
-; GCN-NEXT: .LBB1_3: ; %bb.inner.end
+; GCN-NEXT: ; %bb.3: ; %bb.inner.end
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: s_mov_b32 s0, s2
; GCN-NEXT: s_mov_b32 s1, s2
@@ -397,7 +396,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 2, v0
; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GCN-NEXT: s_cbranch_execz .LBB2_3
; GCN-NEXT: ; %bb.2: ; %bb.else
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s7, 0xf000
@@ -406,7 +404,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: v_mov_b32_e32 v0, 2
; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 offset:8
; GCN-NEXT: ; implicit-def: $vgpr3_vgpr4
-; GCN-NEXT: .LBB2_3: ; %Flow
+; GCN-NEXT: ; %bb.3: ; %Flow
; GCN-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
; GCN-NEXT: s_cbranch_execz .LBB2_5
; GCN-NEXT: ; %bb.4: ; %bb.then
@@ -635,7 +633,6 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GCN-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:12
; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc
-; GCN-NEXT: s_cbranch_execz .LBB3_3
; GCN-NEXT: ; %bb.2: ; %bb.inner.then2
; GCN-NEXT: s_mov_b32 s10, 0
; GCN-NEXT: s_mov_b32 s11, 0xf000
@@ -643,7 +640,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: s_mov_b32 s9, s10
; GCN-NEXT: v_mov_b32_e32 v0, 4
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[8:11], 0 addr64 offset:16
-; GCN-NEXT: .LBB3_3: ; %Flow
+; GCN-NEXT: ; %bb.3: ; %Flow
; GCN-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
; GCN-NEXT: ; implicit-def: $vgpr0
@@ -658,11 +655,10 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 2, v0
; GCN-NEXT: buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:4
; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT: s_cbranch_execz .LBB3_7
; GCN-NEXT: ; %bb.6: ; %bb.inner.then
; GCN-NEXT: v_mov_b32_e32 v0, 2
; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8
-; GCN-NEXT: .LBB3_7: ; %Flow1
+; GCN-NEXT: ; %bb.7: ; %Flow1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: .LBB3_8: ; %bb.outer.end
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
index b4ed3cafbacb5f..4d8758e87021f7 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
@@ -156,9 +156,7 @@ name: skip_bvh
body: |
; CHECK-LABEL: name: skip_bvh
; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: successors: %bb.1(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index 7371d498a70706..6cc2393d598e04 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -539,11 +539,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr4
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -611,11 +610,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -681,11 +679,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr4
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -751,11 +748,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_load_dword s1, s[2:3], 0x2c
; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -823,11 +819,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_load_b32 s4, s[2:3], 0x2c
; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -903,11 +898,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[2:3], 0x2c
; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 60af21524a04a1..f7f8536219db6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -540,11 +540,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr4
; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX8DAGISEL-NEXT: ; %bb.1: ; %else
; GFX8DAGISEL-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX8DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -612,11 +611,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr4
; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX9DAGISEL-NEXT: ; %bb.1: ; %else
; GFX9DAGISEL-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX9DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -682,11 +680,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr4
; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1064DAGISEL-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1064DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -752,11 +749,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1
; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1032DAGISEL-NEXT: s_load_dword s1, s[2:3], 0x2c
; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1032DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -824,11 +820,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1164DAGISEL-NEXT: s_load_b32 s4, s[2:3], 0x2c
; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1164DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s4
@@ -904,11 +899,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0
; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_2
; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else
; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[2:3], 0x2c
; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT: .LBB4_2: ; %Flow
+; GFX1132DAGISEL-NEXT: ; %bb.2: ; %Flow
; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
index 5fb50d7e8589a7..9765d35dc50d83 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
@@ -178,10 +178,9 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc
; CHECK-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; CHECK-NEXT: s_cbranch_execz .LBB6_2
; CHECK-NEXT: ; %bb.1: ; %ELSE
; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 idxen
-; CHECK-NEXT: .LBB6_2: ; %Flow
+; CHECK-NEXT: ; %bb.2: ; %Flow
; CHECK-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
; CHECK-NEXT: s_cbranch_execz .LBB6_4
; CHECK-NEXT: ; %bb.3: ; %IF
diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
index 295ae94902da73..569a0f52df738f 100644
--- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
@@ -7923,7 +7923,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_add_co_i32 s1, s5, 4
; GFX12-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX12-NEXT: s_cbranch_execz .LBB29_2
; GFX12-NEXT: ; %bb.1:
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_bcnt1_i32_b32 s5, s6
@@ -7933,7 +7932,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mul_f32 v1, 0x42280000, v1
; GFX12-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX12-NEXT: .LBB29_2:
+; GFX12-NEXT: ; %bb.2:
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_mov_b32 s7, exec_lo
@@ -7944,7 +7943,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX12-NEXT: s_mov_b32 s6, exec_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_cmpx_eq_u32_e32 0, v2
-; GFX12-NEXT: s_cbranch_execz .LBB29_4
; GFX12-NEXT: ; %bb.3:
; GFX12-NEXT: s_bcnt1_i32_b32 s0, s7
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
@@ -7953,8 +7951,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mul_f32 v1, 0x42280000, v1
; GFX12-NEXT: ds_add_f32 v2, v1
-; GFX12-NEXT: .LBB29_4:
-; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: ; %bb.4:
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s6
; GFX12-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX12-NEXT: s_mov_b32 s1, exec_lo
@@ -7988,11 +7985,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX12-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_xor_b32 s1, exec_lo, s1
-; GFX12-NEXT: s_cbranch_execz .LBB29_8
; GFX12-NEXT: ; %bb.7:
; GFX12-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s0
; GFX12-NEXT: ds_add_rtn_f32 v1, v1, v2
-; GFX12-NEXT: .LBB29_8:
+; GFX12-NEXT: ; %bb.8:
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
@@ -8018,7 +8014,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX940-NEXT: s_add_i32 s5, s5, 4
; GFX940-NEXT: ; implicit-def: $vgpr1
; GFX940-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GFX940-NEXT: s_cbranch_execz .LBB29_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX940-NEXT: s_lshl_b32 s8, s5, 3
@@ -8026,7 +8021,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX940-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX940-NEXT: v_mov_b32_e32 v2, s8
; GFX940-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX940-NEXT: .LBB29_2:
+; GFX940-NEXT: ; %bb.2:
; GFX940-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX940-NEXT: s_mov_b64 s[8:9], exec
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
@@ -8035,7 +8030,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1
; GFX940-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1
; GFX940-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX940-NEXT: s_cbranch_execz .LBB29_4
; GFX940-NEXT: ; %bb.3:
; GFX940-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
@@ -8043,7 +8037,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX940-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX940-NEXT: v_mov_b32_e32 v2, s0
; GFX940-NEXT: ds_add_f32 v2, v1
-; GFX940-NEXT: .LBB29_4:
+; GFX940-NEXT: ; %bb.4:
; GFX940-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX940-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX940-NEXT: v_mul_f32_e32 v0, 0x42280000, v0
@@ -8072,11 +8066,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX940-NEXT: ; implicit-def: $vgpr2
; GFX940-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX940-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT: s_cbranch_execz .LBB29_8
; GFX940-NEXT: ; %bb.7:
; GFX940-NEXT: v_mov_b32_e32 v2, s4
; GFX940-NEXT: ds_add_rtn_f32 v2, v2, v1
-; GFX940-NEXT: .LBB29_8:
+; GFX940-NEXT: ; %bb.8:
; GFX940-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
@@ -8100,7 +8093,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_add_i32 s1, s5, 4
; GFX11-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX11-NEXT: s_cbranch_execz .LBB29_2
; GFX11-NEXT: ; %bb.1:
; GFX11-NEXT: s_bcnt1_i32_b32 s5, s6
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -8109,7 +8101,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mul_f32 v1, 0x42280000, v1
; GFX11-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX11-NEXT: .LBB29_2:
+; GFX11-NEXT: ; %bb.2:
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
; GFX11-NEXT: s_mov_b32 s7, exec_lo
@@ -8118,7 +8110,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
; GFX11-NEXT: s_mov_b32 s6, exec_lo
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v2
-; GFX11-NEXT: s_cbranch_execz .LBB29_4
; GFX11-NEXT: ; %bb.3:
; GFX11-NEXT: s_bcnt1_i32_b32 s0, s7
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -8127,7 +8118,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mul_f32 v1, 0x42280000, v1
; GFX11-NEXT: ds_add_f32 v2, v1
-; GFX11-NEXT: .LBB29_4:
+; GFX11-NEXT: ; %bb.4:
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s6
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX11-NEXT: v_bfrev_b32_e32 v1, 1
@@ -8159,11 +8150,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX11-NEXT: ; implicit-def: $vgpr2
; GFX11-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX11-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT: s_cbranch_execz .LBB29_8
; GFX11-NEXT: ; %bb.7:
; GFX11-NEXT: v_mov_b32_e32 v2, s4
; GFX11-NEXT: ds_add_rtn_f32 v2, v2, v1
-; GFX11-NEXT: .LBB29_8:
+; GFX11-NEXT: ; %bb.8:
; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
@@ -8186,7 +8176,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_add_i32 s1, s5, 4
; GFX10-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GFX10-NEXT: s_cbranch_execz .LBB29_2
; GFX10-NEXT: ; %bb.1:
; GFX10-NEXT: s_bcnt1_i32_b32 s5, s6
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, s5
@@ -8194,7 +8183,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: v_mov_b32_e32 v2, s5
; GFX10-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX10-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX10-NEXT: .LBB29_2:
+; GFX10-NEXT: ; %bb.2:
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_mov_b32 s7, exec_lo
@@ -8203,7 +8192,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v2, s7, 0
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
; GFX10-NEXT: s_and_saveexec_b32 s6, s0
-; GFX10-NEXT: s_cbranch_execz .LBB29_4
; GFX10-NEXT: ; %bb.3:
; GFX10-NEXT: s_bcnt1_i32_b32 s0, s7
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
@@ -8211,7 +8199,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX10-NEXT: ds_add_f32 v2, v1
-; GFX10-NEXT: .LBB29_4:
+; GFX10-NEXT: ; %bb.4:
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s6
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
@@ -8238,11 +8226,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: ; implicit-def: $vgpr2
; GFX10-NEXT: s_and_saveexec_b32 s0, vcc_lo
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT: s_cbranch_execz .LBB29_8
; GFX10-NEXT: ; %bb.7:
; GFX10-NEXT: v_mov_b32_e32 v2, s4
; GFX10-NEXT: ds_add_rtn_f32 v2, v2, v1
-; GFX10-NEXT: .LBB29_8:
+; GFX10-NEXT: ; %bb.8:
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -8251,7 +8238,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_add_f32_e32 v0, s2, v0
; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s2, vcc_lo
-; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -8266,7 +8252,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX90A-NEXT: s_add_i32 s5, s5, 4
; GFX90A-NEXT: ; implicit-def: $vgpr1
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GFX90A-NEXT: s_cbranch_execz .LBB29_2
; GFX90A-NEXT: ; %bb.1:
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX90A-NEXT: s_lshl_b32 s8, s5, 3
@@ -8274,7 +8259,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX90A-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, s8
; GFX90A-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX90A-NEXT: .LBB29_2:
+; GFX90A-NEXT: ; %bb.2:
; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX90A-NEXT: s_mov_b64 s[8:9], exec
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
@@ -8283,7 +8268,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1
; GFX90A-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1
; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX90A-NEXT: s_cbranch_execz .LBB29_4
; GFX90A-NEXT: ; %bb.3:
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
; GFX90A-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
@@ -8291,7 +8275,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX90A-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX90A-NEXT: v_mov_b32_e32 v2, s0
; GFX90A-NEXT: ds_add_f32 v2, v1
-; GFX90A-NEXT: .LBB29_4:
+; GFX90A-NEXT: ; %bb.4:
; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX90A-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX90A-NEXT: v_mul_f32_e32 v0, 0x42280000, v0
@@ -8320,11 +8304,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX90A-NEXT: ; implicit-def: $vgpr2
; GFX90A-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX90A-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX90A-NEXT: s_cbranch_execz .LBB29_8
; GFX90A-NEXT: ; %bb.7:
; GFX90A-NEXT: v_mov_b32_e32 v2, s4
; GFX90A-NEXT: ds_add_rtn_f32 v2, v2, v1
-; GFX90A-NEXT: .LBB29_8:
+; GFX90A-NEXT: ; %bb.8:
; GFX90A-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
@@ -8347,7 +8330,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX908-NEXT: s_add_i32 s5, s5, 4
; GFX908-NEXT: ; implicit-def: $vgpr1
; GFX908-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GFX908-NEXT: s_cbranch_execz .LBB29_2
; GFX908-NEXT: ; %bb.1:
; GFX908-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX908-NEXT: s_lshl_b32 s8, s5, 3
@@ -8355,7 +8337,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX908-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX908-NEXT: v_mov_b32_e32 v2, s8
; GFX908-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX908-NEXT: .LBB29_2:
+; GFX908-NEXT: ; %bb.2:
; GFX908-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX908-NEXT: s_mov_b64 s[8:9], exec
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
@@ -8364,7 +8346,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX908-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1
; GFX908-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1
; GFX908-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX908-NEXT: s_cbranch_execz .LBB29_4
; GFX908-NEXT: ; %bb.3:
; GFX908-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
; GFX908-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
@@ -8372,7 +8353,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX908-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX908-NEXT: v_mov_b32_e32 v2, s0
; GFX908-NEXT: ds_add_f32 v2, v1
-; GFX908-NEXT: .LBB29_4:
+; GFX908-NEXT: ; %bb.4:
; GFX908-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX908-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX908-NEXT: v_mul_f32_e32 v0, 0x42280000, v0
@@ -8401,11 +8382,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX908-NEXT: ; implicit-def: $vgpr2
; GFX908-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX908-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX908-NEXT: s_cbranch_execz .LBB29_8
; GFX908-NEXT: ; %bb.7:
; GFX908-NEXT: v_mov_b32_e32 v2, s4
; GFX908-NEXT: ds_add_rtn_f32 v2, v2, v1
-; GFX908-NEXT: .LBB29_8:
+; GFX908-NEXT: ; %bb.8:
; GFX908-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
@@ -8429,7 +8409,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX8-NEXT: ; implicit-def: $vgpr1
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; GFX8-NEXT: s_cbranch_execz .LBB29_2
; GFX8-NEXT: ; %bb.1:
; GFX8-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
; GFX8-NEXT: s_lshl_b32 s8, s5, 3
@@ -8437,7 +8416,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX8-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX8-NEXT: v_mov_b32_e32 v2, s8
; GFX8-NEXT: ds_add_rtn_f32 v1, v2, v1
-; GFX8-NEXT: .LBB29_2:
+; GFX8-NEXT: ; %bb.2:
; GFX8-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX8-NEXT: s_mov_b64 s[8:9], exec
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -8446,7 +8425,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX8-NEXT: v_mbcnt_hi_u32_b32 v1, s9, v1
; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v1
; GFX8-NEXT: s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX8-NEXT: s_cbranch_execz .LBB29_4
; GFX8-NEXT: ; %bb.3:
; GFX8-NEXT: s_bcnt1_i32_b64 s0, s[8:9]
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
@@ -8454,7 +8432,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX8-NEXT: v_mul_f32_e32 v1, 0x42280000, v1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: ds_add_f32 v2, v1
-; GFX8-NEXT: .LBB29_4:
+; GFX8-NEXT: ; %bb.4:
; GFX8-NEXT: s_or_b64 exec, exec, s[6:7]
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
; GFX8-NEXT: v_mul_f32_e32 v0, 0x42280000, v0
@@ -8483,12 +8461,11 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
; GFX8-NEXT: ; implicit-def: $vgpr2
; GFX8-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX8-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8-NEXT: s_cbranch_execz .LBB29_8
; GFX8-NEXT: ; %bb.7:
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: ds_add_rtn_f32 v2, v2, v1
-; GFX8-NEXT: .LBB29_8:
+; GFX8-NEXT: ; %bb.8:
; GFX8-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index ad38d78ddb2ff1..66a55d9eb128c6 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -65,7 +65,6 @@ ret.bb: ; preds = %else, %main_body
; GCN: .LBB{{[0-9]+_[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_{{[0-9]+}}
; GCN-NEXT: ; %unreachable.bb
; GCN: ds_write_b32
diff --git a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
index 6f841c88a6d8bb..8b65aeb5836913 100644
--- a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
@@ -13,7 +13,6 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 3, v0
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
-; GCN-NEXT: s_cbranch_execz .LBB0_4
; GCN-NEXT: ; %bb.3: ; %.then
; GCN-NEXT: s_or_saveexec_b32 s1, -1
; GCN-NEXT: v_cndmask_b32_e64 v1, 0, v3, s1
@@ -24,7 +23,7 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
; GCN-NEXT: v_mov_b32_e32 v4, -1
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: buffer_store_dword v4, v0, s[4:7], 0 offen
-; GCN-NEXT: .LBB0_4: ; %.end
+; GCN-NEXT: ; %bb.4: ; %.end
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GCN-NEXT: v_mov_b32_e32 v0, -1
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index 13745d4d5b171d..aec0189c0581ae 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -3,13 +3,12 @@
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
; GCN: v_cmp_eq_u32
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB0_{{[0-9]+}}
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN-NEXT: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock
+; GCN-NEXT: %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
; GCN: s_endpgm
define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
@@ -29,13 +28,12 @@ ret:
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
; GCN: v_cmp_ne_u32
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB1_{{[0-9]+}}
; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN: BB1_{{[0-9]+}}:
+; GCN: %bb.{{[0-9]+}}:
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index a5e1506114f2d0..ec48d4620849be 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -77,13 +77,12 @@ define hidden void @widget() {
; GCN-NEXT: .LBB0_4: ; %Flow2
; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[46:47]
; GCN-NEXT: s_xor_b64 s[18:19], exec, s[18:19]
-; GCN-NEXT: s_cbranch_execz .LBB0_6
; GCN-NEXT: ; %bb.5: ; %bb12
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_store_dword v[0:1], v2
-; GCN-NEXT: .LBB0_6: ; %Flow3
+; GCN-NEXT: ; %bb.6: ; %Flow3
; GCN-NEXT: s_or_b64 exec, exec, s[18:19]
; GCN-NEXT: s_andn2_b64 vcc, exec, s[16:17]
; GCN-NEXT: s_cbranch_vccnz .LBB0_8
@@ -359,23 +358,21 @@ define hidden void @blam() {
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_mov_b64 s[8:9], s[52:53]
; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[42:43]
-; GCN-NEXT: s_cbranch_execz .LBB1_7
; GCN-NEXT: ; %bb.6: ; %bb16
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
; GCN-NEXT: s_or_b64 s[8:9], s[52:53], exec
-; GCN-NEXT: .LBB1_7: ; %Flow3
+; GCN-NEXT: ; %bb.7: ; %Flow3
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[8:9]
; GCN-NEXT: s_xor_b64 s[8:9], exec, s[10:11]
-; GCN-NEXT: s_cbranch_execz .LBB1_9
; GCN-NEXT: ; %bb.8: ; %bb17
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], 0
-; GCN-NEXT: .LBB1_9: ; %Flow4
+; GCN-NEXT: ; %bb.9: ; %Flow4
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec
@@ -401,12 +398,11 @@ define hidden void @blam() {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: s_mov_b64 s[10:11], s[6:7]
; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_15
; GCN-NEXT: ; %bb.14: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0
; GCN-NEXT: s_or_b64 s[10:11], s[6:7], exec
-; GCN-NEXT: .LBB1_15: ; %Flow6
+; GCN-NEXT: ; %bb.15: ; %Flow6
; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[12:13]
; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index 2d5e5a9160fdf7..0c1852f4042542 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -60,10 +60,9 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dword v2, v3, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB1_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dword v2, v3, s[6:7]
-; GFX906-NEXT: .LBB1_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: global_store_dword v1, v2, s[0:1]
@@ -136,10 +135,9 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[1:2], v4, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB3_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx2 v[1:2], v4, s[6:7]
-; GFX906-NEXT: .LBB3_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: global_store_dwordx2 v3, v[1:2], s[0:1]
@@ -172,10 +170,9 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx4 v[1:4], v6, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB4_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx4 v[1:4], v6, s[6:7]
-; GFX906-NEXT: .LBB4_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: global_store_dwordx4 v5, v[1:4], s[0:1]
@@ -209,11 +206,10 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
; GFX906-NEXT: global_load_dwordx4 v[1:4], v10, s[4:5] offset:16
; GFX906-NEXT: global_load_dwordx4 v[5:8], v10, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB5_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx4 v[1:4], v10, s[6:7] offset:16
; GFX906-NEXT: global_load_dwordx4 v[5:8], v10, s[6:7]
-; GFX906-NEXT: .LBB5_2: ; %bb.2
+; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_waitcnt vmcnt(1)
; GFX906-NEXT: global_store_dwordx4 v9, v[1:4], s[0:1] offset:16
@@ -412,14 +408,13 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB8_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[6:7]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
; GFX906-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX906-NEXT: s_and_b64 s[4:5], vcc, exec
; GFX906-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
-; GFX906-NEXT: .LBB8_2: ; %Flow
+; GFX906-NEXT: ; %bb.2: ; %Flow
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
; GFX906-NEXT: s_cbranch_execz .LBB8_4
@@ -646,11 +641,10 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
; GFX906-NEXT: global_load_dwordx2 v[1:2], v6, s[6:7]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT: s_cbranch_execz .LBB11_3
; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: global_store_dwordx2 v0, v[3:4], s[8:9]
-; GFX906-NEXT: .LBB11_3: ; %Flow
+; GFX906-NEXT: ; %bb.3: ; %Flow
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: .LBB11_4: ; %bb.3
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index ab84c0c905771b..613a742e89b1ab 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -1361,13 +1361,12 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX9-W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
; GFX9-W64-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX9-W64-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
-; GFX9-W64-NEXT: s_cbranch_execz .LBB27_2
; GFX9-W64-NEXT: ; %bb.1: ; %ELSE
; GFX9-W64-NEXT: s_and_saveexec_b64 s[16:17], s[12:13]
; GFX9-W64-NEXT: buffer_store_dword v2, v0, s[0:3], 0 idxen
; GFX9-W64-NEXT: ; implicit-def: $vgpr0
; GFX9-W64-NEXT: s_mov_b64 exec, s[16:17]
-; GFX9-W64-NEXT: .LBB27_2: ; %Flow
+; GFX9-W64-NEXT: ; %bb.2: ; %Flow
; GFX9-W64-NEXT: s_andn2_saveexec_b64 s[14:15], s[14:15]
; GFX9-W64-NEXT: s_cbranch_execz .LBB27_4
; GFX9-W64-NEXT: ; %bb.3: ; %IF
@@ -1388,13 +1387,12 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr
; GFX10-W32-NEXT: s_mov_b32 s13, exec_lo
; GFX10-W32-NEXT: v_cmpx_ne_u32_e32 0, v1
; GFX10-W32-NEXT: s_xor_b32 s13, exec_lo, s13
-; GFX10-W32-NEXT: s_cbranch_execz .LBB27_2
; GFX10-W32-NEXT: ; %bb.1: ; %ELSE
; GFX10-W32-NEXT: s_and_saveexec_b32 s14, s12
; GFX10-W32-NEXT: buffer_store_dword v2, v0, s[0:3], 0 idxen
; GFX10-W32-NEXT: ; implicit-def: $vgpr0
; GFX10-W32-NEXT: s_mov_b32 exec_lo, s14
-; GFX10-W32-NEXT: .LBB27_2: ; %Flow
+; GFX10-W32-NEXT: ; %bb.2: ; %Flow
; GFX10-W32-NEXT: s_andn2_saveexec_b32 s13, s13
; GFX10-W32-NEXT: s_cbranch_execz .LBB27_4
; GFX10-W32-NEXT: ; %bb.3: ; %IF
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index d1500e002d7e92..32ffe92b942cea 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -86,19 +86,17 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
-; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT: .LBB0_2: ; %Flow
+; CHECK-NEXT: ; %bb.2: ; %Flow
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_addk_i32 s32, 0xfa00
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index deadc4adb02c5e..77b62e6a7b0cbc 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -27,19 +27,17 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; CHECK-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
-; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1:
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT: .LBB0_2: ; %Flow
+; CHECK-NEXT: ; %bb.2: ; %Flow
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12
-; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_addk_i32 s32, 0xfa00
More information about the llvm-commits
mailing list