[llvm] [AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: accept memory instructions in the "then" block (PR #109995)

Juan Manuel Martinez CaamaƱo via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 07:58:34 PDT 2024


https://github.com/jmmartinez updated https://github.com/llvm/llvm-project/pull/109995

>From 6656481eb291d663d6c85d0f7e41f24200e55409 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Wed, 25 Sep 2024 16:01:40 +0200
Subject: [PATCH] [AMDGPU][SIPreEmitPeephole] mustRetainExeczBranch: accept
 memory instructions in the "then" block

---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  |  5 +-
 .../atomic_optimizations_mul_one.ll           | 36 ++++------
 ...vergence-divergent-i1-used-outside-loop.ll | 12 ++--
 .../GlobalISel/divergence-structurizer.ll     |  3 +-
 .../divergence-temporal-divergent-i1.ll       |  3 +-
 .../AMDGPU/GlobalISel/vni8-across-blocks.ll   | 18 ++---
 .../AMDGPU/amdgpu-demote-scc-branches.ll      | 27 +++----
 .../atomic_optimizations_pixelshader.ll       | 36 ++++------
 llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll  |  9 +--
 .../CodeGen/AMDGPU/branch-condition-and.ll    |  3 +-
 .../AMDGPU/cgp-addressing-modes-flat.ll       | 72 +++++++------------
 llvm/test/CodeGen/AMDGPU/collapse-endcf.ll    | 12 ++--
 .../CodeGen/AMDGPU/insert-skips-gfx10.mir     |  4 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll | 18 ++---
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll | 18 ++---
 .../CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll     |  3 +-
 .../CodeGen/AMDGPU/local-atomicrmw-fadd.ll    | 65 ++++++-----------
 llvm/test/CodeGen/AMDGPU/ret_jump.ll          |  1 -
 .../AMDGPU/set-inactive-wwm-overwrite.ll      |  3 +-
 ...si-lower-control-flow-unreachable-block.ll |  6 +-
 .../AMDGPU/unstructured-cfg-def-use-issue.ll  | 12 ++--
 .../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 18 ++---
 llvm/test/CodeGen/AMDGPU/wqm.ll               |  6 +-
 ...dgpu_generated_funcs.ll.generated.expected |  6 +-
 ...pu_generated_funcs.ll.nogenerated.expected |  6 +-
 25 files changed, 133 insertions(+), 269 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 1334029544f999..51bd596affc3ae 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -326,9 +326,8 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
       if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
         return true;
 
-      // These instructions are potentially expensive even if EXEC = 0.
-      if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
-          TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
+      // Waitcnt instructions are potentially expensive even if EXEC = 0.
+      if (TII->isWaitcnt(MI.getOpcode()))
         return true;
 
       ++NumInstr;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
index bdfafa89cd0477..6888d40bf2b4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
@@ -41,13 +41,12 @@ define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg)  {
 ; GCN-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB0_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
 ; GCN-NEXT:    buffer_atomic_add v1, v0, s[0:3], 0 idxen
-; GCN-NEXT:  .LBB0_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_endpgm
 .entry:
   call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
@@ -87,13 +86,12 @@ define amdgpu_cs void @atomic_add_and_format(<4 x i32> inreg %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    ; implicit-def: $vgpr1
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB1_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT:  .LBB1_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v1
@@ -139,13 +137,12 @@ define amdgpu_cs void @atomic_sub(<4 x i32> inreg %arg)  {
 ; GCN-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB2_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
 ; GCN-NEXT:    buffer_atomic_sub v1, v0, s[0:3], 0 idxen
-; GCN-NEXT:  .LBB2_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_endpgm
 .entry:
   call i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
@@ -185,13 +182,12 @@ define amdgpu_cs void @atomic_sub_and_format(<4 x i32> inreg %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    ; implicit-def: $vgpr1
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB3_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT:  .LBB3_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v1
@@ -238,14 +234,13 @@ define amdgpu_cs void @atomic_xor(<4 x i32> inreg %arg)  {
 ; GCN-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB4_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GCN-NEXT:    s_and_b32 s4, s4, 1
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
 ; GCN-NEXT:    buffer_atomic_xor v1, v0, s[0:3], 0 idxen
-; GCN-NEXT:  .LBB4_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_endpgm
 .entry:
   call i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32 1, <4 x i32> %arg, i32 0, i32 0, i32 0, i32 0)
@@ -287,14 +282,13 @@ define amdgpu_cs void @atomic_xor_and_format(<4 x i32> inreg %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    ; implicit-def: $vgpr1
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB5_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
 ; GCN-NEXT:    s_and_b32 s6, s6, 1
 ; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT:  .LBB5_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v1
@@ -341,13 +335,12 @@ define amdgpu_cs void @atomic_ptr_add(ptr addrspace(8) inreg %arg)  {
 ; GCN-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB6_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
 ; GCN-NEXT:    buffer_atomic_add v1, v0, s[0:3], 0 idxen
-; GCN-NEXT:  .LBB6_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_endpgm
 .entry:
   call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) %arg, i32 0, i32 0, i32 0, i32 0)
@@ -389,13 +382,12 @@ define amdgpu_cs void @atomic_ptr_add_and_format(ptr addrspace(8) inreg %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    ; implicit-def: $vgpr1
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB7_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    buffer_atomic_add v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT:  .LBB7_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v1
@@ -443,13 +435,12 @@ define amdgpu_cs void @atomic_ptr_sub(ptr addrspace(8) inreg %arg)  {
 ; GCN-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB8_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
 ; GCN-NEXT:    buffer_atomic_sub v1, v0, s[0:3], 0 idxen
-; GCN-NEXT:  .LBB8_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_endpgm
 .entry:
   call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32 1, ptr addrspace(8) %arg, i32 0, i32 0, i32 0, i32 0)
@@ -491,13 +482,12 @@ define amdgpu_cs void @atomic_ptr_sub_and_format(ptr addrspace(8) inreg %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    ; implicit-def: $vgpr1
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB9_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    buffer_atomic_sub v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT:  .LBB9_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v1
@@ -546,14 +536,13 @@ define amdgpu_cs void @atomic_ptr_xor(ptr addrspace(8) inreg %arg)  {
 ; GCN-NEXT:    v_mbcnt_hi_u32_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB10_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s4, s[4:5]
 ; GCN-NEXT:    s_and_b32 s4, s4, 1
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s4
 ; GCN-NEXT:    buffer_atomic_xor v1, v0, s[0:3], 0 idxen
-; GCN-NEXT:  .LBB10_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_endpgm
 .entry:
   call i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32 1, ptr addrspace(8) %arg, i32 0, i32 0, i32 0, i32 0)
@@ -597,14 +586,13 @@ define amdgpu_cs void @atomic_ptr_xor_and_format(ptr addrspace(8) inreg %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    ; implicit-def: $vgpr1
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB11_2
 ; GCN-NEXT:  ; %bb.1:
 ; GCN-NEXT:    s_bcnt1_i32_b64 s6, s[6:7]
 ; GCN-NEXT:    s_and_b32 s6, s6, 1
 ; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    buffer_atomic_xor v1, v2, s[0:3], 0 idxen glc
-; GCN-NEXT:  .LBB11_2:
+; GCN-NEXT:  ; %bb.2:
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_readfirstlane_b32 s4, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
index b27d8fdc24ff73..e08d4f0da86b88 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll
@@ -249,11 +249,10 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
 ; GFX10-NEXT:  .LBB3_6: ; %Flow1
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, s6
-; GFX10-NEXT:    s_cbranch_execz .LBB3_8
 ; GFX10-NEXT:  ; %bb.7: ; %block.after.loop
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 5
 ; GFX10-NEXT:    flat_store_dword v[3:4], v0
-; GFX10-NEXT:  .LBB3_8: ; %exit
+; GFX10-NEXT:  ; %bb.8: ; %exit
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
@@ -315,7 +314,6 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
 ; GFX10-NEXT:    v_mov_b32_e32 v4, v5
 ; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v0, v4
 ; GFX10-NEXT:    s_and_saveexec_b32 s7, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB4_4
 ; GFX10-NEXT:  ; %bb.3: ; %if.block.0
 ; GFX10-NEXT:    ; in Loop: Header=BB4_2 Depth=1
 ; GFX10-NEXT:    v_ashrrev_i32_e32 v5, 31, v4
@@ -323,7 +321,7 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
 ; GFX10-NEXT:    v_add_co_u32 v8, s4, v2, v8
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
 ; GFX10-NEXT:    global_store_dword v[8:9], v4, off
-; GFX10-NEXT:  .LBB4_4: ; %loop.break.block
+; GFX10-NEXT:  ; %bb.4: ; %loop.break.block
 ; GFX10-NEXT:    ; in Loop: Header=BB4_2 Depth=1
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s7
@@ -342,10 +340,9 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
 ; GFX10-NEXT:  .LBB4_6: ; %cond.block.1
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, s6
-; GFX10-NEXT:    s_cbranch_execz .LBB4_8
 ; GFX10-NEXT:  ; %bb.7: ; %if.block.1
 ; GFX10-NEXT:    global_store_dword v[6:7], v4, off
-; GFX10-NEXT:  .LBB4_8: ; %exit
+; GFX10-NEXT:  ; %bb.8: ; %exit
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -536,11 +533,10 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX10-NEXT:    s_and_saveexec_b32 s0, s1
 ; GFX10-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT:    s_cbranch_execz .LBB6_6
 ; GFX10-NEXT:  ; %bb.5: ; %break.body
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 10
 ; GFX10-NEXT:    global_store_dword v[4:5], v0, off
-; GFX10-NEXT:  .LBB6_6: ; %exit
+; GFX10-NEXT:  ; %bb.6: ; %exit
 ; GFX10-NEXT:    s_endpgm
 entry:
   br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
index 1698f84eea5185..50af8cb739e48c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll
@@ -437,11 +437,10 @@ define amdgpu_cs void @loop_with_div_break_with_body(ptr addrspace(1) %x, ptr ad
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX10-NEXT:    s_and_saveexec_b32 s0, s1
 ; GFX10-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT:    s_cbranch_execz .LBB5_6
 ; GFX10-NEXT:  ; %bb.5: ; %break.body
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 10
 ; GFX10-NEXT:    global_store_dword v[4:5], v0, off
-; GFX10-NEXT:  .LBB5_6: ; %exit
+; GFX10-NEXT:  ; %bb.6: ; %exit
 ; GFX10-NEXT:    s_endpgm
 entry:
   br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
index 1855ede0483def..ea671a7adc7384 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll
@@ -152,12 +152,11 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, i32 %x.size, ptr ad
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    s_and_saveexec_b32 s1, s0
 ; GFX10-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX10-NEXT:    s_cbranch_execz .LBB2_7
 ; GFX10-NEXT:  ; %bb.6: ; %break.body
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 10
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
-; GFX10-NEXT:  .LBB2_7: ; %exit
+; GFX10-NEXT:  ; %bb.7: ; %exit
 ; GFX10-NEXT:    s_endpgm
 entry:
   br label %A
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
index 386e34f72ab734..98d064fcd3c7aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
@@ -68,10 +68,9 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dword v1, v2, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dword v1, v2, s[6:7]
-; GFX906-NEXT:  .LBB1_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
@@ -149,10 +148,9 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[6:7]
-; GFX906-NEXT:  .LBB3_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
@@ -185,10 +183,9 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v5, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v5, s[6:7]
-; GFX906-NEXT:  .LBB4_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
@@ -222,11 +219,10 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v9, s[4:5]
 ; GFX906-NEXT:    global_load_dwordx4 v[5:8], v9, s[4:5] offset:16
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v9, s[6:7]
 ; GFX906-NEXT:    global_load_dwordx4 v[5:8], v9, s[6:7] offset:16
-; GFX906-NEXT:  .LBB5_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX906-NEXT:    s_waitcnt vmcnt(1)
@@ -486,14 +482,13 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB8_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[6:7]
 ; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v0
 ; GFX906-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; GFX906-NEXT:    s_and_b64 s[4:5], exec, vcc
 ; GFX906-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
-; GFX906-NEXT:  .LBB8_2: ; %Flow
+; GFX906-NEXT:  ; %bb.2: ; %Flow
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], s[0:1]
 ; GFX906-NEXT:    s_cbranch_execz .LBB8_4
@@ -547,11 +542,10 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v5, s[6:7]
 ; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v0
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB9_3
 ; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX906-NEXT:    global_store_dwordx2 v0, v[3:4], s[8:9]
-; GFX906-NEXT:  .LBB9_3: ; %Flow
+; GFX906-NEXT:  ; %bb.3: ; %Flow
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:  .LBB9_4: ; %bb.3
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll
index 9319f0d3f5d40f..90bbb4af85e267 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll
@@ -150,7 +150,6 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX9-NEXT:  ; %bb.1: ; %if.then
 ; GFX9-NEXT:    s_mov_b32 s11, s18
 ; GFX9-NEXT:    s_mov_b32 s10, s17
@@ -159,7 +158,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s19
 ; GFX9-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX9-NEXT:  .LBB3_2: ; %if.end
+; GFX9-NEXT:  ; %bb.2: ; %if.end
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -169,7 +168,6 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v0
 ; GFX1010-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX1010-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX1010-NEXT:  ; %bb.1: ; %if.then
 ; GFX1010-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1010-NEXT:    v_mov_b32_e32 v1, s19
@@ -178,7 +176,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-NEXT:    s_mov_b32 s9, s16
 ; GFX1010-NEXT:    s_mov_b32 s8, s7
 ; GFX1010-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1010-NEXT:  .LBB3_2: ; %if.end
+; GFX1010-NEXT:  ; %bb.2: ; %if.end
 ; GFX1010-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1010-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -189,7 +187,6 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1030-NEXT:    v_cmpx_lt_i32_e32 0, v0
-; GFX1030-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX1030-NEXT:  ; %bb.1: ; %if.then
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1030-NEXT:    v_mov_b32_e32 v1, s19
@@ -198,7 +195,7 @@ define void @divergent_br_no_metadata(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030-NEXT:    s_mov_b32 s9, s16
 ; GFX1030-NEXT:    s_mov_b32 s8, s7
 ; GFX1030-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1030-NEXT:  .LBB3_2: ; %if.end
+; GFX1030-NEXT:  ; %bb.2: ; %if.end
 ; GFX1030-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_setpc_b64 s[30:31]
@@ -221,7 +218,6 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX9-NEXT:  ; %bb.1: ; %if.then
 ; GFX9-NEXT:    s_mov_b32 s11, s18
 ; GFX9-NEXT:    s_mov_b32 s10, s17
@@ -230,7 +226,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s19
 ; GFX9-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX9-NEXT:  .LBB4_2: ; %if.end
+; GFX9-NEXT:  ; %bb.2: ; %if.end
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -240,7 +236,6 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v0
 ; GFX1010-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX1010-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1010-NEXT:  ; %bb.1: ; %if.then
 ; GFX1010-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1010-NEXT:    v_mov_b32_e32 v1, s19
@@ -249,7 +244,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
 ; GFX1010-NEXT:    s_mov_b32 s9, s16
 ; GFX1010-NEXT:    s_mov_b32 s8, s7
 ; GFX1010-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1010-NEXT:  .LBB4_2: ; %if.end
+; GFX1010-NEXT:  ; %bb.2: ; %if.end
 ; GFX1010-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1010-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -260,7 +255,6 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1030-NEXT:    v_cmpx_lt_i32_e32 0, v0
-; GFX1030-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1030-NEXT:  ; %bb.1: ; %if.then
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1030-NEXT:    v_mov_b32_e32 v1, s19
@@ -269,7 +263,7 @@ define void @divergent_br_unprofitable(i32 noundef inreg %value, ptr addrspace(8
 ; GFX1030-NEXT:    s_mov_b32 s9, s16
 ; GFX1030-NEXT:    s_mov_b32 s8, s7
 ; GFX1030-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1030-NEXT:  .LBB4_2: ; %if.end
+; GFX1030-NEXT:  ; %bb.2: ; %if.end
 ; GFX1030-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_setpc_b64 s[30:31]
@@ -292,7 +286,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_cmp_lt_i32_e32 vcc, 0, v0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9-NEXT:  ; %bb.1: ; %if.then
 ; GFX9-NEXT:    s_mov_b32 s11, s18
 ; GFX9-NEXT:    s_mov_b32 s10, s17
@@ -301,7 +294,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s19
 ; GFX9-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX9-NEXT:  .LBB5_2: ; %if.end
+; GFX9-NEXT:  ; %bb.2: ; %if.end
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -311,7 +304,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1010-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 0, v0
 ; GFX1010-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX1010-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1010-NEXT:  ; %bb.1: ; %if.then
 ; GFX1010-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1010-NEXT:    v_mov_b32_e32 v1, s19
@@ -320,7 +312,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1010-NEXT:    s_mov_b32 s9, s16
 ; GFX1010-NEXT:    s_mov_b32 s8, s7
 ; GFX1010-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1010-NEXT:  .LBB5_2: ; %if.end
+; GFX1010-NEXT:  ; %bb.2: ; %if.end
 ; GFX1010-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1010-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -331,7 +323,6 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_mov_b32 s4, exec_lo
 ; GFX1030-NEXT:    v_cmpx_lt_i32_e32 0, v0
-; GFX1030-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX1030-NEXT:  ; %bb.1: ; %if.then
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX1030-NEXT:    v_mov_b32_e32 v1, s19
@@ -340,7 +331,7 @@ define void @divergent_br_profitable(i32 noundef inreg %value, ptr addrspace(8)
 ; GFX1030-NEXT:    s_mov_b32 s9, s16
 ; GFX1030-NEXT:    s_mov_b32 s8, s7
 ; GFX1030-NEXT:    buffer_store_dword v0, v1, s[8:11], 0 offen
-; GFX1030-NEXT:  .LBB5_2: ; %if.end
+; GFX1030-NEXT:  ; %bb.2: ; %if.end
 ; GFX1030-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1030-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1030-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 429e6c489bf6f8..37443760f1bceb 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -27,13 +27,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
 ; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7-NEXT:    ; implicit-def: $vgpr1
 ; GFX7-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX7-NEXT:  ; %bb.2:
 ; GFX7-NEXT:    s_bcnt1_i32_b64 s12, s[12:13]
 ; GFX7-NEXT:    s_mul_i32 s12, s12, 5
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s12
 ; GFX7-NEXT:    buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX7-NEXT:  .LBB0_3:
+; GFX7-NEXT:  ; %bb.3:
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_readfirstlane_b32 s4, v1
@@ -62,13 +61,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
 ; GFX89-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX89-NEXT:    ; implicit-def: $vgpr1
 ; GFX89-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX89-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX89-NEXT:  ; %bb.2:
 ; GFX89-NEXT:    s_bcnt1_i32_b64 s12, s[12:13]
 ; GFX89-NEXT:    s_mul_i32 s12, s12, 5
 ; GFX89-NEXT:    v_mov_b32_e32 v1, s12
 ; GFX89-NEXT:    buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX89-NEXT:  .LBB0_3:
+; GFX89-NEXT:  ; %bb.3:
 ; GFX89-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX89-NEXT:    s_waitcnt vmcnt(0)
 ; GFX89-NEXT:    v_readfirstlane_b32 s4, v1
@@ -97,13 +95,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
 ; GFX1064-NEXT:    v_mbcnt_hi_u32_b32 v0, s13, v0
 ; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1064-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX1064-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX1064-NEXT:  ; %bb.2:
 ; GFX1064-NEXT:    s_bcnt1_i32_b64 s12, s[12:13]
 ; GFX1064-NEXT:    s_mul_i32 s12, s12, 5
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, s12
 ; GFX1064-NEXT:    buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX1064-NEXT:  .LBB0_3:
+; GFX1064-NEXT:  ; %bb.3:
 ; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
@@ -132,13 +129,12 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
 ; GFX1032-NEXT:    v_mbcnt_lo_u32_b32 v0, s10, 0
 ; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX1032-NEXT:    s_and_saveexec_b32 s9, vcc_lo
-; GFX1032-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX1032-NEXT:  ; %bb.2:
 ; GFX1032-NEXT:    s_bcnt1_i32_b32 s10, s10
 ; GFX1032-NEXT:    s_mul_i32 s10, s10, 5
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, s10
 ; GFX1032-NEXT:    buffer_atomic_add v1, off, s[4:7], 0 glc
-; GFX1032-NEXT:  .LBB0_3:
+; GFX1032-NEXT:  ; %bb.3:
 ; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s9
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
@@ -170,14 +166,13 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
 ; GFX1164-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1164-NEXT:    v_mbcnt_hi_u32_b32 v0, s13, v0
 ; GFX1164-NEXT:    v_cmpx_eq_u32_e32 0, v0
-; GFX1164-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX1164-NEXT:  ; %bb.2:
 ; GFX1164-NEXT:    s_bcnt1_i32_b64 s12, s[12:13]
 ; GFX1164-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1164-NEXT:    s_mul_i32 s12, s12, 5
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, s12
 ; GFX1164-NEXT:    buffer_atomic_add_u32 v1, off, s[4:7], 0 glc
-; GFX1164-NEXT:  .LBB0_3:
+; GFX1164-NEXT:  ; %bb.3:
 ; GFX1164-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX1164-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1164-NEXT:    v_readfirstlane_b32 s4, v1
@@ -211,14 +206,13 @@ define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspa
 ; GFX1132-NEXT:    ; implicit-def: $vgpr1
 ; GFX1132-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132-NEXT:    v_cmpx_eq_u32_e32 0, v0
-; GFX1132-NEXT:    s_cbranch_execz .LBB0_3
 ; GFX1132-NEXT:  ; %bb.2:
 ; GFX1132-NEXT:    s_bcnt1_i32_b32 s10, s10
 ; GFX1132-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX1132-NEXT:    s_mul_i32 s10, s10, 5
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, s10
 ; GFX1132-NEXT:    buffer_atomic_add_u32 v1, off, s[4:7], 0 glc
-; GFX1132-NEXT:  .LBB0_3:
+; GFX1132-NEXT:  ; %bb.3:
 ; GFX1132-NEXT:    s_or_b32 exec_lo, exec_lo, s9
 ; GFX1132-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1132-NEXT:    v_readfirstlane_b32 s4, v1
@@ -298,11 +292,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; GFX8-NEXT:    ; implicit-def: $vgpr0
 ; GFX8-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX8-NEXT:  ; %bb.2:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX8-NEXT:    buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX8-NEXT:  .LBB1_3:
+; GFX8-NEXT:  ; %bb.3:
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_readfirstlane_b32 s4, v0
@@ -353,11 +346,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
 ; GFX9-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX9-NEXT:  ; %bb.2:
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX9-NEXT:    buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX9-NEXT:  .LBB1_3:
+; GFX9-NEXT:  ; %bb.3:
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_readfirstlane_b32 s4, v0
@@ -412,11 +404,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1064-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1064-NEXT:    ; implicit-def: $vgpr0
 ; GFX1064-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX1064-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX1064-NEXT:  ; %bb.2:
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX1064-NEXT:    buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX1064-NEXT:  .LBB1_3:
+; GFX1064-NEXT:  ; %bb.3:
 ; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
@@ -462,11 +453,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1032-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX1032-NEXT:    ; implicit-def: $vgpr0
 ; GFX1032-NEXT:    s_and_saveexec_b32 s9, vcc_lo
-; GFX1032-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX1032-NEXT:  ; %bb.2:
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, s11
 ; GFX1032-NEXT:    buffer_atomic_add v0, off, s[4:7], 0 glc
-; GFX1032-NEXT:  .LBB1_3:
+; GFX1032-NEXT:  ; %bb.3:
 ; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s9
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
@@ -531,11 +521,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1164-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX1164-NEXT:    ; implicit-def: $vgpr0
 ; GFX1164-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GFX1164-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX1164-NEXT:  ; %bb.2:
 ; GFX1164-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX1164-NEXT:    buffer_atomic_add_u32 v0, off, s[4:7], 0 glc
-; GFX1164-NEXT:  .LBB1_3:
+; GFX1164-NEXT:  ; %bb.3:
 ; GFX1164-NEXT:    s_or_b64 exec, exec, s[10:11]
 ; GFX1164-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1164-NEXT:    v_readfirstlane_b32 s4, v0
@@ -591,11 +580,10 @@ define amdgpu_ps void @add_i32_varying(ptr addrspace(8) inreg %out, ptr addrspac
 ; GFX1132-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX1132-NEXT:    ; implicit-def: $vgpr0
 ; GFX1132-NEXT:    s_and_saveexec_b32 s9, vcc_lo
-; GFX1132-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX1132-NEXT:  ; %bb.2:
 ; GFX1132-NEXT:    v_mov_b32_e32 v0, s11
 ; GFX1132-NEXT:    buffer_atomic_add_u32 v0, off, s[4:7], 0 glc
-; GFX1132-NEXT:  .LBB1_3:
+; GFX1132-NEXT:  ; %bb.3:
 ; GFX1132-NEXT:    s_or_b32 exec_lo, exec_lo, s9
 ; GFX1132-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1132-NEXT:    v_readfirstlane_b32 s4, v0
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index 4f0bc512565d13..a93104d978003c 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -153,12 +153,11 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
 ; GFX90A-NEXT:    ; implicit-def: $vgpr3
 ; GFX90A-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GFX90A-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; GFX90A-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.global
 ; GFX90A-NEXT:    global_atomic_add_f32 v3, v[0:1], v2, off glc
 ; GFX90A-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GFX90A-NEXT:    ; implicit-def: $vgpr2
-; GFX90A-NEXT:  .LBB1_3: ; %Flow
+; GFX90A-NEXT:  ; %bb.3: ; %Flow
 ; GFX90A-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GFX90A-NEXT:    s_cbranch_execz .LBB1_5
 ; GFX90A-NEXT:  ; %bb.4: ; %atomicrmw.private
@@ -241,12 +240,11 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
 ; GFX908-NEXT:    v_cmp_ne_u32_e32 vcc, s7, v1
 ; GFX908-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GFX908-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; GFX908-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX908-NEXT:  ; %bb.4: ; %atomicrmw.global
 ; GFX908-NEXT:    global_atomic_add_f32 v[0:1], v2, off
 ; GFX908-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GFX908-NEXT:    ; implicit-def: $vgpr2
-; GFX908-NEXT:  .LBB2_5: ; %Flow
+; GFX908-NEXT:  ; %bb.5: ; %Flow
 ; GFX908-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GFX908-NEXT:    s_cbranch_execz .LBB2_7
 ; GFX908-NEXT:  ; %bb.6: ; %atomicrmw.private
@@ -291,12 +289,11 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
 ; GFX90A-NEXT:    v_cmp_ne_u32_e32 vcc, s7, v1
 ; GFX90A-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; GFX90A-NEXT:    s_xor_b64 s[6:7], exec, s[6:7]
-; GFX90A-NEXT:    s_cbranch_execz .LBB2_5
 ; GFX90A-NEXT:  ; %bb.4: ; %atomicrmw.global
 ; GFX90A-NEXT:    global_atomic_add_f32 v[0:1], v2, off
 ; GFX90A-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GFX90A-NEXT:    ; implicit-def: $vgpr2
-; GFX90A-NEXT:  .LBB2_5: ; %Flow
+; GFX90A-NEXT:  ; %bb.5: ; %Flow
 ; GFX90A-NEXT:    s_andn2_saveexec_b64 s[6:7], s[6:7]
 ; GFX90A-NEXT:    s_cbranch_execz .LBB2_7
 ; GFX90A-NEXT:  ; %bb.6: ; %atomicrmw.private
diff --git a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
index cc05129b1b2af6..bb8a69d804b07d 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
@@ -14,12 +14,11 @@
 ; GCN-DAG: v_cmp_lt_f32_e32 vcc,
 ; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
 ; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; GCN-NEXT: s_cbranch_execz .LBB0_{{[0-9]+}}
 
 ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %bb4
 ; GCN: ds_write_b32
 
-; GCN: .LBB0_{{[0-9]+}}: ; %UnifiedReturnBlock
+; GCN: %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
 ; GCN-NEXT: s_endpgm
 ; GCN-NEXT: .Lfunc_end
 define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index fdae1696a5a492..ba3d6a498c8328 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -76,12 +76,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB0_2
 ; GFX7-NEXT:  ; %bb.1: ; %if
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, 28, v2
 ; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX7-NEXT:    flat_load_dword v4, v[2:3]
-; GFX7-NEXT:  .LBB0_2: ; %endif
+; GFX7-NEXT:  ; %bb.2: ; %endif
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x3d08fc, v0
 ; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -96,12 +95,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB0_2
 ; GFX8-NEXT:  ; %bb.1: ; %if
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 28, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX8-NEXT:    flat_load_dword v4, v[2:3]
-; GFX8-NEXT:  .LBB0_2: ; %endif
+; GFX8-NEXT:  ; %bb.2: ; %endif
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3d08fc, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -116,10 +114,9 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB0_2
 ; GFX9-NEXT:  ; %bb.1: ; %if
 ; GFX9-NEXT:    flat_load_dword v4, v[2:3] offset:28
-; GFX9-NEXT:  .LBB0_2: ; %endif
+; GFX9-NEXT:  ; %bb.2: ; %endif
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -134,10 +131,9 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB0_2
 ; GFX10-NEXT:  ; %bb.1: ; %if
 ; GFX10-NEXT:    flat_load_dword v4, v[2:3] offset:28
-; GFX10-NEXT:  .LBB0_2: ; %endif
+; GFX10-NEXT:  ; %bb.2: ; %endif
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -232,13 +228,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX7-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX7-NEXT:  ; %bb.1: ; %if
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
 ; GFX7-NEXT:    buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
-; GFX7-NEXT:  .LBB1_2: ; %endif
+; GFX7-NEXT:  ; %bb.2: ; %endif
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x3d08fc, v0
 ; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -253,12 +248,11 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX8-NEXT:  ; %bb.1: ; %if
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 28, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX8-NEXT:    flat_load_dword v4, v[2:3]
-; GFX8-NEXT:  .LBB1_2: ; %endif
+; GFX8-NEXT:  ; %bb.2: ; %endif
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3d08fc, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -273,10 +267,9 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX9-NEXT:  ; %bb.1: ; %if
 ; GFX9-NEXT:    global_load_dword v4, v[2:3], off offset:28
-; GFX9-NEXT:  .LBB1_2: ; %endif
+; GFX9-NEXT:  ; %bb.2: ; %endif
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -291,10 +284,9 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX10-NEXT:  ; %bb.1: ; %if
 ; GFX10-NEXT:    global_load_dword v4, v[2:3], off offset:28
-; GFX10-NEXT:  .LBB1_2: ; %endif
+; GFX10-NEXT:  ; %bb.2: ; %endif
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -345,13 +337,12 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX7-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX7-NEXT:  ; %bb.1: ; %if
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s4, s6
 ; GFX7-NEXT:    s_mov_b32 s5, s6
 ; GFX7-NEXT:    buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
-; GFX7-NEXT:  .LBB2_2: ; %endif
+; GFX7-NEXT:  ; %bb.2: ; %endif
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x3d08fc, v0
 ; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -366,12 +357,11 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX8-NEXT:  ; %bb.1: ; %if
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 28, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX8-NEXT:    flat_load_dword v4, v[2:3]
-; GFX8-NEXT:  .LBB2_2: ; %endif
+; GFX8-NEXT:  ; %bb.2: ; %endif
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x3d08fc, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -386,10 +376,9 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX9-NEXT:  ; %bb.1: ; %if
 ; GFX9-NEXT:    global_load_dword v4, v[2:3], off offset:28
-; GFX9-NEXT:  .LBB2_2: ; %endif
+; GFX9-NEXT:  ; %bb.2: ; %endif
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -404,10 +393,9 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX10-NEXT:  ; %bb.1: ; %if
 ; GFX10-NEXT:    global_load_dword v4, v[2:3], off offset:28
-; GFX10-NEXT:  .LBB2_2: ; %endif
+; GFX10-NEXT:  ; %bb.2: ; %endif
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -513,12 +501,11 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX7-NEXT:  ; %bb.1: ; %if
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, 0xfff, v2
 ; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX7-NEXT:    flat_load_sbyte v4, v[2:3]
-; GFX7-NEXT:  .LBB3_2: ; %endif
+; GFX7-NEXT:  ; %bb.2: ; %endif
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x1000, v0
 ; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -534,12 +521,11 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX8-NEXT:  ; %bb.1: ; %if
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xfff, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX8-NEXT:    flat_load_sbyte v4, v[2:3]
-; GFX8-NEXT:  .LBB3_2: ; %endif
+; GFX8-NEXT:  ; %bb.2: ; %endif
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x1000, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -555,10 +541,9 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX9-NEXT:  ; %bb.1: ; %if
 ; GFX9-NEXT:    flat_load_sbyte v4, v[2:3] offset:4095
-; GFX9-NEXT:  .LBB3_2: ; %endif
+; GFX9-NEXT:  ; %bb.2: ; %endif
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -574,12 +559,11 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX10-NEXT:  ; %bb.1: ; %if
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, 0x800, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    flat_load_sbyte v4, v[2:3] offset:2047
-; GFX10-NEXT:  .LBB3_2: ; %endif
+; GFX10-NEXT:  ; %bb.2: ; %endif
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -634,12 +618,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX7-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX7-NEXT:  ; %bb.1: ; %if
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, 0x1000, v2
 ; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX7-NEXT:    flat_load_sbyte v4, v[2:3]
-; GFX7-NEXT:  .LBB4_2: ; %endif
+; GFX7-NEXT:  ; %bb.2: ; %endif
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x61a7c, v0
 ; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -655,12 +638,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX8-NEXT:  ; %bb.1: ; %if
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x1000, v2
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX8-NEXT:    flat_load_sbyte v4, v[2:3]
-; GFX8-NEXT:  .LBB4_2: ; %endif
+; GFX8-NEXT:  ; %bb.2: ; %endif
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x61a7c, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -676,12 +658,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX9-NEXT:  ; %bb.1: ; %if
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0x1000, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    flat_load_sbyte v4, v[2:3]
-; GFX9-NEXT:  .LBB4_2: ; %endif
+; GFX9-NEXT:  ; %bb.2: ; %endif
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x61000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -697,12 +678,11 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX10-NEXT:  ; %bb.1: ; %if
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, 0x1000, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    flat_load_sbyte v4, v[2:3]
-; GFX10-NEXT:  .LBB4_2: ; %endif
+; GFX10-NEXT:  ; %bb.2: ; %endif
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x61800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
@@ -757,12 +737,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; GFX7-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
 ; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX7-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX7-NEXT:  ; %bb.1: ; %if
 ; GFX7-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX7-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
 ; GFX7-NEXT:    flat_load_sbyte v6, v[2:3]
-; GFX7-NEXT:  .LBB5_2: ; %endif
+; GFX7-NEXT:  ; %bb.2: ; %endif
 ; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0x1000, v0
 ; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -778,12 +757,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; GFX8-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX8-NEXT:  ; %bb.1: ; %if
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
 ; GFX8-NEXT:    flat_load_sbyte v6, v[2:3]
-; GFX8-NEXT:  .LBB5_2: ; %endif
+; GFX8-NEXT:  ; %bb.2: ; %endif
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x1000, v0
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
@@ -799,12 +777,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
 ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
-; GFX9-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX9-NEXT:  ; %bb.1: ; %if
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
 ; GFX9-NEXT:    flat_load_sbyte v6, v[2:3]
-; GFX9-NEXT:  .LBB5_2: ; %endif
+; GFX9-NEXT:  ; %bb.2: ; %endif
 ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -820,12 +797,11 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
 ; GFX10-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX10-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX10-NEXT:  ; %bb.1: ; %if
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v4
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
 ; GFX10-NEXT:    flat_load_sbyte v6, v[2:3]
-; GFX10-NEXT:  .LBB5_2: ; %endif
+; GFX10-NEXT:  ; %bb.2: ; %endif
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 67a084068941a4..840679be252aa5 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -192,13 +192,12 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
 ; GCN-NEXT:    buffer_store_dword v4, v[3:4], s[0:3], 0 addr64
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB1_3
 ; GCN-NEXT:  ; %bb.2: ; %bb.inner.then
 ; GCN-NEXT:    s_mov_b32 s0, s2
 ; GCN-NEXT:    s_mov_b32 s1, s2
 ; GCN-NEXT:    v_mov_b32_e32 v0, 1
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:4
-; GCN-NEXT:  .LBB1_3: ; %bb.inner.end
+; GCN-NEXT:  ; %bb.3: ; %bb.inner.end
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_mov_b32 s0, s2
 ; GCN-NEXT:    s_mov_b32 s1, s2
@@ -397,7 +396,6 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 2, v0
 ; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GCN-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GCN-NEXT:    s_cbranch_execz .LBB2_3
 ; GCN-NEXT:  ; %bb.2: ; %bb.else
 ; GCN-NEXT:    s_mov_b32 s6, 0
 ; GCN-NEXT:    s_mov_b32 s7, 0xf000
@@ -406,7 +404,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    v_mov_b32_e32 v0, 2
 ; GCN-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 offset:8
 ; GCN-NEXT:    ; implicit-def: $vgpr3_vgpr4
-; GCN-NEXT:  .LBB2_3: ; %Flow
+; GCN-NEXT:  ; %bb.3: ; %Flow
 ; GCN-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
 ; GCN-NEXT:    s_cbranch_execz .LBB2_5
 ; GCN-NEXT:  ; %bb.4: ; %bb.then
@@ -635,7 +633,6 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GCN-NEXT:    buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:12
 ; GCN-NEXT:    s_and_saveexec_b64 s[0:1], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB3_3
 ; GCN-NEXT:  ; %bb.2: ; %bb.inner.then2
 ; GCN-NEXT:    s_mov_b32 s10, 0
 ; GCN-NEXT:    s_mov_b32 s11, 0xf000
@@ -643,7 +640,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    s_mov_b32 s9, s10
 ; GCN-NEXT:    v_mov_b32_e32 v0, 4
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[8:11], 0 addr64 offset:16
-; GCN-NEXT:  .LBB3_3: ; %Flow
+; GCN-NEXT:  ; %bb.3: ; %Flow
 ; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
 ; GCN-NEXT:    ; implicit-def: $vgpr0
@@ -658,11 +655,10 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
 ; GCN-NEXT:    buffer_store_dword v3, v[1:2], s[0:3], 0 addr64 offset:4
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB3_7
 ; GCN-NEXT:  ; %bb.6: ; %bb.inner.then
 ; GCN-NEXT:    v_mov_b32_e32 v0, 2
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8
-; GCN-NEXT:  .LBB3_7: ; %Flow1
+; GCN-NEXT:  ; %bb.7: ; %Flow1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:  .LBB3_8: ; %bb.outer.end
 ; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
index b4ed3cafbacb5f..4d8758e87021f7 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
@@ -156,9 +156,7 @@ name: skip_bvh
 body: |
   ; CHECK-LABEL: name: skip_bvh
   ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index 7371d498a70706..6cc2393d598e04 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -539,11 +539,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr4
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8DAGISEL-NEXT:    s_load_dword s4, s[2:3], 0x2c
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -611,11 +610,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr4
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9DAGISEL-NEXT:    s_load_dword s4, s[2:3], 0x2c
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -681,11 +679,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr4
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064DAGISEL-NEXT:    s_load_dword s4, s[2:3], 0x2c
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -751,11 +748,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[2:3], 0x2c
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
@@ -823,11 +819,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s4, s[2:3], 0x2c
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -903,11 +898,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[2:3], 0x2c
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 60af21524a04a1..f7f8536219db6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -540,11 +540,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr4
 ; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX8DAGISEL-NEXT:    s_load_dword s4, s[2:3], 0x2c
 ; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX8DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX8DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -612,11 +611,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr4
 ; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX9DAGISEL-NEXT:    s_load_dword s4, s[2:3], 0x2c
 ; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX9DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX9DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -682,11 +680,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr4
 ; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1064DAGISEL-NEXT:    s_load_dword s4, s[2:3], 0x2c
 ; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1064DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1064DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -752,11 +749,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
 ; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[2:3], 0x2c
 ; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1032DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1032DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
 ; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
@@ -824,11 +820,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1164DAGISEL-NEXT:    s_load_b32 s4, s[2:3], 0x2c
 ; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1164DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1164DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s4
@@ -904,11 +899,10 @@ define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
 ; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
 ; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
 ; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[2:3], 0x2c
 ; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
-; GFX1132DAGISEL-NEXT:  .LBB4_2: ; %Flow
+; GFX1132DAGISEL-NEXT:  ; %bb.2: ; %Flow
 ; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
index 5fb50d7e8589a7..9765d35dc50d83 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
@@ -178,10 +178,9 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr
 ; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; CHECK-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; CHECK-NEXT:    s_cbranch_execz .LBB6_2
 ; CHECK-NEXT:  ; %bb.1: ; %ELSE
 ; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 idxen
-; CHECK-NEXT:  .LBB6_2: ; %Flow
+; CHECK-NEXT:  ; %bb.2: ; %Flow
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
 ; CHECK-NEXT:    s_cbranch_execz .LBB6_4
 ; CHECK-NEXT:  ; %bb.3: ; %IF
diff --git a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
index 295ae94902da73..569a0f52df738f 100644
--- a/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll
@@ -7923,7 +7923,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    s_add_co_i32 s1, s5, 4
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX12-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX12-NEXT:  ; %bb.1:
 ; GFX12-NEXT:    s_wait_alu 0xfffe
 ; GFX12-NEXT:    s_bcnt1_i32_b32 s5, s6
@@ -7933,7 +7932,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX12-NEXT:    s_wait_alu 0xfffe
 ; GFX12-NEXT:    v_dual_mov_b32 v2, s5 :: v_dual_mul_f32 v1, 0x42280000, v1
 ; GFX12-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX12-NEXT:  .LBB29_2:
+; GFX12-NEXT:  ; %bb.2:
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_mov_b32 s7, exec_lo
@@ -7944,7 +7943,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX12-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX12-NEXT:    v_cmpx_eq_u32_e32 0, v2
-; GFX12-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX12-NEXT:  ; %bb.3:
 ; GFX12-NEXT:    s_bcnt1_i32_b32 s0, s7
 ; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
@@ -7953,8 +7951,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX12-NEXT:    s_wait_alu 0xfffe
 ; GFX12-NEXT:    v_dual_mov_b32 v2, s0 :: v_dual_mul_f32 v1, 0x42280000, v1
 ; GFX12-NEXT:    ds_add_f32 v2, v1
-; GFX12-NEXT:  .LBB29_4:
-; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:  ; %bb.4:
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s6
 ; GFX12-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 ; GFX12-NEXT:    s_mov_b32 s1, exec_lo
@@ -7988,11 +7985,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX12-NEXT:    s_and_saveexec_b32 s1, vcc_lo
 ; GFX12-NEXT:    s_wait_alu 0xfffe
 ; GFX12-NEXT:    s_xor_b32 s1, exec_lo, s1
-; GFX12-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX12-NEXT:  ; %bb.7:
 ; GFX12-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_mov_b32 v2, s0
 ; GFX12-NEXT:    ds_add_rtn_f32 v1, v1, v2
-; GFX12-NEXT:  .LBB29_8:
+; GFX12-NEXT:  ; %bb.8:
 ; GFX12-NEXT:    s_wait_alu 0xfffe
 ; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX12-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
@@ -8018,7 +8014,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX940-NEXT:    s_add_i32 s5, s5, 4
 ; GFX940-NEXT:    ; implicit-def: $vgpr1
 ; GFX940-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GFX940-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX940-NEXT:  ; %bb.1:
 ; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
 ; GFX940-NEXT:    s_lshl_b32 s8, s5, 3
@@ -8026,7 +8021,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX940-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX940-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX940-NEXT:  .LBB29_2:
+; GFX940-NEXT:  ; %bb.2:
 ; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX940-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8035,7 +8030,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX940-NEXT:    v_mbcnt_hi_u32_b32 v1, s9, v1
 ; GFX940-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v1
 ; GFX940-NEXT:    s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX940-NEXT:  ; %bb.3:
 ; GFX940-NEXT:    s_bcnt1_i32_b64 s0, s[8:9]
 ; GFX940-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
@@ -8043,7 +8037,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX940-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX940-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX940-NEXT:    ds_add_f32 v2, v1
-; GFX940-NEXT:  .LBB29_4:
+; GFX940-NEXT:  ; %bb.4:
 ; GFX940-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX940-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 ; GFX940-NEXT:    v_mul_f32_e32 v0, 0x42280000, v0
@@ -8072,11 +8066,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX940-NEXT:    ; implicit-def: $vgpr2
 ; GFX940-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX940-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX940-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX940-NEXT:  ; %bb.7:
 ; GFX940-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX940-NEXT:    ds_add_rtn_f32 v2, v2, v1
-; GFX940-NEXT:  .LBB29_8:
+; GFX940-NEXT:  ; %bb.8:
 ; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8100,7 +8093,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_add_i32 s1, s5, 4
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX11-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX11-NEXT:  ; %bb.1:
 ; GFX11-NEXT:    s_bcnt1_i32_b32 s5, s6
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -8109,7 +8101,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s5 :: v_dual_mul_f32 v1, 0x42280000, v1
 ; GFX11-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX11-NEXT:  .LBB29_2:
+; GFX11-NEXT:  ; %bb.2:
 ; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    s_mov_b32 s7, exec_lo
@@ -8118,7 +8110,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX11-NEXT:    v_mbcnt_lo_u32_b32 v2, s7, 0
 ; GFX11-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v2
-; GFX11-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX11-NEXT:  ; %bb.3:
 ; GFX11-NEXT:    s_bcnt1_i32_b32 s0, s7
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -8127,7 +8118,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s0 :: v_dual_mul_f32 v1, 0x42280000, v1
 ; GFX11-NEXT:    ds_add_f32 v2, v1
-; GFX11-NEXT:  .LBB29_4:
+; GFX11-NEXT:  ; %bb.4:
 ; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s6
 ; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 ; GFX11-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -8159,11 +8150,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX11-NEXT:    ; implicit-def: $vgpr2
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX11-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX11-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX11-NEXT:  ; %bb.7:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX11-NEXT:    ds_add_rtn_f32 v2, v2, v1
-; GFX11-NEXT:  .LBB29_8:
+; GFX11-NEXT:  ; %bb.8:
 ; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8186,7 +8176,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_add_i32 s1, s5, 4
 ; GFX10-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GFX10-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX10-NEXT:  ; %bb.1:
 ; GFX10-NEXT:    s_bcnt1_i32_b32 s5, s6
 ; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v1, s5
@@ -8194,7 +8183,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s5
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX10-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX10-NEXT:  .LBB29_2:
+; GFX10-NEXT:  ; %bb.2:
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX10-NEXT:    s_mov_b32 s7, exec_lo
@@ -8203,7 +8192,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX10-NEXT:    v_mbcnt_lo_u32_b32 v2, s7, 0
 ; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, 0, v2
 ; GFX10-NEXT:    s_and_saveexec_b32 s6, s0
-; GFX10-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX10-NEXT:  ; %bb.3:
 ; GFX10-NEXT:    s_bcnt1_i32_b32 s0, s7
 ; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
@@ -8211,7 +8199,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX10-NEXT:    ds_add_f32 v2, v1
-; GFX10-NEXT:  .LBB29_4:
+; GFX10-NEXT:  ; %bb.4:
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s6
 ; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
@@ -8238,11 +8226,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX10-NEXT:    ; implicit-def: $vgpr2
 ; GFX10-NEXT:    s_and_saveexec_b32 s0, vcc_lo
 ; GFX10-NEXT:    s_xor_b32 s0, exec_lo, s0
-; GFX10-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX10-NEXT:  ; %bb.7:
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX10-NEXT:    ds_add_rtn_f32 v2, v2, v1
-; GFX10-NEXT:  .LBB29_8:
+; GFX10-NEXT:  ; %bb.8:
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -8251,7 +8238,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    v_add_f32_e32 v0, s2, v0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, s2, vcc_lo
-; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
@@ -8266,7 +8252,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX90A-NEXT:    s_add_i32 s5, s5, 4
 ; GFX90A-NEXT:    ; implicit-def: $vgpr1
 ; GFX90A-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GFX90A-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX90A-NEXT:  ; %bb.1:
 ; GFX90A-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
 ; GFX90A-NEXT:    s_lshl_b32 s8, s5, 3
@@ -8274,7 +8259,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX90A-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX90A-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX90A-NEXT:  .LBB29_2:
+; GFX90A-NEXT:  ; %bb.2:
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX90A-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8283,7 +8268,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX90A-NEXT:    v_mbcnt_hi_u32_b32 v1, s9, v1
 ; GFX90A-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v1
 ; GFX90A-NEXT:    s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX90A-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX90A-NEXT:  ; %bb.3:
 ; GFX90A-NEXT:    s_bcnt1_i32_b64 s0, s[8:9]
 ; GFX90A-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
@@ -8291,7 +8275,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX90A-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX90A-NEXT:    ds_add_f32 v2, v1
-; GFX90A-NEXT:  .LBB29_4:
+; GFX90A-NEXT:  ; %bb.4:
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX90A-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 ; GFX90A-NEXT:    v_mul_f32_e32 v0, 0x42280000, v0
@@ -8320,11 +8304,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX90A-NEXT:    ; implicit-def: $vgpr2
 ; GFX90A-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX90A-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX90A-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX90A-NEXT:  ; %bb.7:
 ; GFX90A-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX90A-NEXT:    ds_add_rtn_f32 v2, v2, v1
-; GFX90A-NEXT:  .LBB29_8:
+; GFX90A-NEXT:  ; %bb.8:
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8347,7 +8330,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX908-NEXT:    s_add_i32 s5, s5, 4
 ; GFX908-NEXT:    ; implicit-def: $vgpr1
 ; GFX908-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GFX908-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX908-NEXT:  ; %bb.1:
 ; GFX908-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
 ; GFX908-NEXT:    s_lshl_b32 s8, s5, 3
@@ -8355,7 +8337,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX908-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX908-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX908-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX908-NEXT:  .LBB29_2:
+; GFX908-NEXT:  ; %bb.2:
 ; GFX908-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX908-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8364,7 +8346,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX908-NEXT:    v_mbcnt_hi_u32_b32 v1, s9, v1
 ; GFX908-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v1
 ; GFX908-NEXT:    s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX908-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX908-NEXT:  ; %bb.3:
 ; GFX908-NEXT:    s_bcnt1_i32_b64 s0, s[8:9]
 ; GFX908-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
@@ -8372,7 +8353,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX908-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX908-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX908-NEXT:    ds_add_f32 v2, v1
-; GFX908-NEXT:  .LBB29_4:
+; GFX908-NEXT:  ; %bb.4:
 ; GFX908-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX908-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 ; GFX908-NEXT:    v_mul_f32_e32 v0, 0x42280000, v0
@@ -8401,11 +8382,10 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX908-NEXT:    ; implicit-def: $vgpr2
 ; GFX908-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX908-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX908-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX908-NEXT:  ; %bb.7:
 ; GFX908-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX908-NEXT:    ds_add_rtn_f32 v2, v2, v1
-; GFX908-NEXT:  .LBB29_8:
+; GFX908-NEXT:  ; %bb.8:
 ; GFX908-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
 ; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8429,7 +8409,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX8-NEXT:    ; implicit-def: $vgpr1
 ; GFX8-NEXT:    s_mov_b32 m0, -1
 ; GFX8-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GFX8-NEXT:    s_cbranch_execz .LBB29_2
 ; GFX8-NEXT:  ; %bb.1:
 ; GFX8-NEXT:    s_bcnt1_i32_b64 s0, s[0:1]
 ; GFX8-NEXT:    s_lshl_b32 s8, s5, 3
@@ -8437,7 +8416,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX8-NEXT:    ds_add_rtn_f32 v1, v2, v1
-; GFX8-NEXT:  .LBB29_2:
+; GFX8-NEXT:  ; %bb.2:
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX8-NEXT:    s_mov_b64 s[8:9], exec
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8446,7 +8425,6 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX8-NEXT:    v_mbcnt_hi_u32_b32 v1, s9, v1
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v1
 ; GFX8-NEXT:    s_and_saveexec_b64 s[6:7], s[0:1]
-; GFX8-NEXT:    s_cbranch_execz .LBB29_4
 ; GFX8-NEXT:  ; %bb.3:
 ; GFX8-NEXT:    s_bcnt1_i32_b64 s0, s[8:9]
 ; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v1, s0
@@ -8454,7 +8432,7 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x42280000, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8-NEXT:    ds_add_f32 v2, v1
-; GFX8-NEXT:  .LBB29_4:
+; GFX8-NEXT:  ; %bb.4:
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX8-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x42280000, v0
@@ -8483,12 +8461,11 @@ define amdgpu_kernel void @local_ds_fadd_one_as(ptr addrspace(1) %out, ptr addrs
 ; GFX8-NEXT:    ; implicit-def: $vgpr2
 ; GFX8-NEXT:    s_and_saveexec_b64 s[0:1], vcc
 ; GFX8-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
-; GFX8-NEXT:    s_cbranch_execz .LBB29_8
 ; GFX8-NEXT:  ; %bb.7:
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX8-NEXT:    s_mov_b32 m0, -1
 ; GFX8-NEXT:    ds_add_rtn_f32 v2, v2, v1
-; GFX8-NEXT:  .LBB29_8:
+; GFX8-NEXT:  ; %bb.8:
 ; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index ad38d78ddb2ff1..66a55d9eb128c6 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -65,7 +65,6 @@ ret.bb:                                          ; preds = %else, %main_body
 
 ; GCN: .LBB{{[0-9]+_[0-9]+}}: ; %else
 ; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN-NEXT: s_cbranch_execz .LBB1_{{[0-9]+}}
 
 ; GCN-NEXT:  ; %unreachable.bb
 ; GCN: ds_write_b32
diff --git a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
index 6f841c88a6d8bb..8b65aeb5836913 100644
--- a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
@@ -13,7 +13,6 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GCN-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 3, v0
 ; GCN-NEXT:    s_and_saveexec_b32 s0, vcc_lo
-; GCN-NEXT:    s_cbranch_execz .LBB0_4
 ; GCN-NEXT:  ; %bb.3: ; %.then
 ; GCN-NEXT:    s_or_saveexec_b32 s1, -1
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, v3, s1
@@ -24,7 +23,7 @@ define amdgpu_cs void @if_then(ptr addrspace(8) inreg %input, ptr addrspace(8) i
 ; GCN-NEXT:    v_mov_b32_e32 v4, -1
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    buffer_store_dword v4, v0, s[4:7], 0 offen
-; GCN-NEXT:  .LBB0_4: ; %.end
+; GCN-NEXT:  ; %bb.4: ; %.end
 ; GCN-NEXT:    s_waitcnt_depctr 0xffe3
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GCN-NEXT:    v_mov_b32_e32 v0, -1
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index 13745d4d5b171d..aec0189c0581ae 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -3,13 +3,12 @@
 ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
 ; GCN: v_cmp_eq_u32
 ; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB0_{{[0-9]+}}
 
 ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
 ; GCN: ds_write_b32
 ; GCN: ; divergent unreachable
 
-; GCN-NEXT: BB0_{{[0-9]+}}: ; %UnifiedReturnBlock
+; GCN-NEXT: %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
 ; GCN: s_endpgm
 
 define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
@@ -29,13 +28,12 @@ ret:
 ; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
 ; GCN: v_cmp_ne_u32
 ; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz .LBB1_{{[0-9]+}}
 
 ; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
 ; GCN: ds_write_b32
 ; GCN: ; divergent unreachable
 
-; GCN: BB1_{{[0-9]+}}:
+; GCN: %bb.{{[0-9]+}}:
 ; GCN-NEXT: s_endpgm
 define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index a5e1506114f2d0..ec48d4620849be 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -77,13 +77,12 @@ define hidden void @widget() {
 ; GCN-NEXT:  .LBB0_4: ; %Flow2
 ; GCN-NEXT:    s_and_saveexec_b64 s[18:19], s[46:47]
 ; GCN-NEXT:    s_xor_b64 s[18:19], exec, s[18:19]
-; GCN-NEXT:    s_cbranch_execz .LBB0_6
 ; GCN-NEXT:  ; %bb.5: ; %bb12
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    flat_store_dword v[0:1], v2
-; GCN-NEXT:  .LBB0_6: ; %Flow3
+; GCN-NEXT:  ; %bb.6: ; %Flow3
 ; GCN-NEXT:    s_or_b64 exec, exec, s[18:19]
 ; GCN-NEXT:    s_andn2_b64 vcc, exec, s[16:17]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB0_8
@@ -359,23 +358,21 @@ define hidden void @blam() {
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    s_mov_b64 s[8:9], s[52:53]
 ; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[42:43]
-; GCN-NEXT:    s_cbranch_execz .LBB1_7
 ; GCN-NEXT:  ; %bb.6: ; %bb16
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], 0
 ; GCN-NEXT:    s_or_b64 s[8:9], s[52:53], exec
-; GCN-NEXT:  .LBB1_7: ; %Flow3
+; GCN-NEXT:  ; %bb.7: ; %Flow3
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GCN-NEXT:    s_mov_b64 s[6:7], 0
 ; GCN-NEXT:    s_and_saveexec_b64 s[10:11], s[8:9]
 ; GCN-NEXT:    s_xor_b64 s[8:9], exec, s[10:11]
-; GCN-NEXT:    s_cbranch_execz .LBB1_9
 ; GCN-NEXT:  ; %bb.8: ; %bb17
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    s_mov_b64 s[6:7], exec
 ; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], 0
-; GCN-NEXT:  .LBB1_9: ; %Flow4
+; GCN-NEXT:  ; %bb.9: ; %Flow4
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GCN-NEXT:    s_and_b64 s[6:7], s[6:7], exec
@@ -401,12 +398,11 @@ define hidden void @blam() {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
 ; GCN-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GCN-NEXT:    s_and_saveexec_b64 s[12:13], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB1_15
 ; GCN-NEXT:  ; %bb.14: ; %bb10
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], 0
 ; GCN-NEXT:    s_or_b64 s[10:11], s[6:7], exec
-; GCN-NEXT:  .LBB1_15: ; %Flow6
+; GCN-NEXT:  ; %bb.15: ; %Flow6
 ; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
 ; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index 2d5e5a9160fdf7..0c1852f4042542 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -60,10 +60,9 @@ define amdgpu_kernel void @v4i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dword v2, v3, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB1_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dword v2, v3, s[6:7]
-; GFX906-NEXT:  .LBB1_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    global_store_dword v1, v2, s[0:1]
@@ -136,10 +135,9 @@ define amdgpu_kernel void @v8i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1)
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v4, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB3_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v4, s[6:7]
-; GFX906-NEXT:  .LBB3_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    global_store_dwordx2 v3, v[1:2], s[0:1]
@@ -172,10 +170,9 @@ define amdgpu_kernel void @v16i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v6, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB4_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v6, s[6:7]
-; GFX906-NEXT:  .LBB4_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    s_waitcnt vmcnt(0)
 ; GFX906-NEXT:    global_store_dwordx4 v5, v[1:4], s[0:1]
@@ -209,11 +206,10 @@ define amdgpu_kernel void @v32i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v10, s[4:5] offset:16
 ; GFX906-NEXT:    global_load_dwordx4 v[5:8], v10, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB5_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx4 v[1:4], v10, s[6:7] offset:16
 ; GFX906-NEXT:    global_load_dwordx4 v[5:8], v10, s[6:7]
-; GFX906-NEXT:  .LBB5_2: ; %bb.2
+; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    s_waitcnt vmcnt(1)
 ; GFX906-NEXT:    global_store_dwordx4 v9, v[1:4], s[0:1] offset:16
@@ -412,14 +408,13 @@ define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(
 ; GFX906-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[4:5]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB8_2
 ; GFX906-NEXT:  ; %bb.1: ; %bb.1
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v3, s[6:7]
 ; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v0
 ; GFX906-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
 ; GFX906-NEXT:    s_and_b64 s[4:5], vcc, exec
 ; GFX906-NEXT:    s_or_b64 s[0:1], s[0:1], s[4:5]
-; GFX906-NEXT:  .LBB8_2: ; %Flow
+; GFX906-NEXT:  ; %bb.2: ; %Flow
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], s[0:1]
 ; GFX906-NEXT:    s_cbranch_execz .LBB8_4
@@ -646,11 +641,10 @@ define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspac
 ; GFX906-NEXT:    global_load_dwordx2 v[1:2], v6, s[6:7]
 ; GFX906-NEXT:    v_cmp_gt_u32_e32 vcc, 7, v0
 ; GFX906-NEXT:    s_and_saveexec_b64 s[2:3], vcc
-; GFX906-NEXT:    s_cbranch_execz .LBB11_3
 ; GFX906-NEXT:  ; %bb.2: ; %bb.2
 ; GFX906-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX906-NEXT:    global_store_dwordx2 v0, v[3:4], s[8:9]
-; GFX906-NEXT:  .LBB11_3: ; %Flow
+; GFX906-NEXT:  ; %bb.3: ; %Flow
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX906-NEXT:  .LBB11_4: ; %bb.3
 ; GFX906-NEXT:    s_or_b64 exec, exec, s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index ab84c0c905771b..613a742e89b1ab 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -1361,13 +1361,12 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr
 ; GFX9-W64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[14:15], vcc
 ; GFX9-W64-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
-; GFX9-W64-NEXT:    s_cbranch_execz .LBB27_2
 ; GFX9-W64-NEXT:  ; %bb.1: ; %ELSE
 ; GFX9-W64-NEXT:    s_and_saveexec_b64 s[16:17], s[12:13]
 ; GFX9-W64-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 idxen
 ; GFX9-W64-NEXT:    ; implicit-def: $vgpr0
 ; GFX9-W64-NEXT:    s_mov_b64 exec, s[16:17]
-; GFX9-W64-NEXT:  .LBB27_2: ; %Flow
+; GFX9-W64-NEXT:  ; %bb.2: ; %Flow
 ; GFX9-W64-NEXT:    s_andn2_saveexec_b64 s[14:15], s[14:15]
 ; GFX9-W64-NEXT:    s_cbranch_execz .LBB27_4
 ; GFX9-W64-NEXT:  ; %bb.3: ; %IF
@@ -1388,13 +1387,12 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr
 ; GFX10-W32-NEXT:    s_mov_b32 s13, exec_lo
 ; GFX10-W32-NEXT:    v_cmpx_ne_u32_e32 0, v1
 ; GFX10-W32-NEXT:    s_xor_b32 s13, exec_lo, s13
-; GFX10-W32-NEXT:    s_cbranch_execz .LBB27_2
 ; GFX10-W32-NEXT:  ; %bb.1: ; %ELSE
 ; GFX10-W32-NEXT:    s_and_saveexec_b32 s14, s12
 ; GFX10-W32-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 idxen
 ; GFX10-W32-NEXT:    ; implicit-def: $vgpr0
 ; GFX10-W32-NEXT:    s_mov_b32 exec_lo, s14
-; GFX10-W32-NEXT:  .LBB27_2: ; %Flow
+; GFX10-W32-NEXT:  ; %bb.2: ; %Flow
 ; GFX10-W32-NEXT:    s_andn2_saveexec_b32 s13, s13
 ; GFX10-W32-NEXT:    s_cbranch_execz .LBB27_4
 ; GFX10-W32-NEXT:  ; %bb.3: ; %IF
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index d1500e002d7e92..32ffe92b942cea 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -86,19 +86,17 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_2
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
 ; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:8
 ; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:12
 ; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT:  .LBB0_2: ; %Flow
+; CHECK-NEXT:  ; %bb.2: ; %Flow
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_4
 ; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 1
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12
-; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:  ; %bb.4:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    s_addk_i32 s32, 0xfa00
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index deadc4adb02c5e..77b62e6a7b0cbc 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -27,19 +27,17 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    s_mov_b64 s[4:5], 0
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
 ; CHECK-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_2
 ; CHECK-NEXT:  ; %bb.1:
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
 ; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], s33 offset:8
 ; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:12
 ; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:16
-; CHECK-NEXT:  .LBB0_2: ; %Flow
+; CHECK-NEXT:  ; %bb.2: ; %Flow
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_4
 ; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 1
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12
-; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:  ; %bb.4:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    s_addk_i32 s32, 0xfa00



More information about the llvm-commits mailing list