[llvm] MachineBlockPlacement: Add tolerance to comparisons (PR #67197)

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 3 15:26:48 PDT 2023


https://github.com/MatzeB updated https://github.com/llvm/llvm-project/pull/67197

>From 3ebfe645f31287576a0924cd5f31de241763d035 Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Fri, 22 Sep 2023 11:00:01 -0700
Subject: [PATCH 1/2] Switch some tests to use update_llc_test_checks.py

---
 .../CodeGen/AMDGPU/optimize-negated-cond.ll   | 123 +++++++++++++++---
 llvm/test/CodeGen/VE/Scalar/br_jt.ll          |  93 ++++++-------
 .../CodeGen/X86/2008-04-17-CoalescerBug.ll    |   1 +
 llvm/test/CodeGen/X86/dup-cost.ll             |  54 ++++++--
 4 files changed, 195 insertions(+), 76 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
index ca51994b92203c3..f284df4d8a70b1b 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
@@ -1,10 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-; GCN-LABEL: {{^}}negated_cond:
-; GCN: .LBB0_2:
-; GCN:   v_cndmask_b32_e64
-; GCN:   v_cmp_ne_u32_e64
 define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
+; GCN-LABEL: negated_cond:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_mov_b32 s6, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[8:9], s[4:5]
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_branch .LBB0_2
+; GCN-NEXT:  .LBB0_1: ; %loop.exit.guard
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
+; GCN-NEXT:    s_and_b64 vcc, exec, s[14:15]
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_9
+; GCN-NEXT:  .LBB0_2: ; %bb1
+; GCN-NEXT:    ; =>This Loop Header: Depth=1
+; GCN-NEXT:    ; Child Loop BB0_4 Depth 2
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    buffer_load_dword v1, off, s[8:11], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, v1
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GCN-NEXT:    s_mov_b32 s12, s6
+; GCN-NEXT:    s_branch .LBB0_4
+; GCN-NEXT:  .LBB0_3: ; %Flow1
+; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[16:17]
+; GCN-NEXT:    s_cbranch_vccz .LBB0_1
+; GCN-NEXT:  .LBB0_4: ; %bb2
+; GCN-NEXT:    ; Parent Loop BB0_2 Depth=1
+; GCN-NEXT:    ; => This Inner Loop Header: Depth=2
+; GCN-NEXT:    s_and_b64 vcc, exec, s[0:1]
+; GCN-NEXT:    s_lshl_b32 s12, s12, 5
+; GCN-NEXT:    s_cbranch_vccz .LBB0_6
+; GCN-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_mov_b64 s[14:15], s[2:3]
+; GCN-NEXT:    s_branch .LBB0_7
+; GCN-NEXT:  .LBB0_6: ; %bb3
+; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_add_i32 s12, s12, 1
+; GCN-NEXT:    s_mov_b64 s[14:15], -1
+; GCN-NEXT:  .LBB0_7: ; %Flow
+; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_andn2_b64 vcc, exec, s[14:15]
+; GCN-NEXT:    s_mov_b64 s[16:17], -1
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_3
+; GCN-NEXT:  ; %bb.8: ; %bb4
+; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_ashr_i32 s13, s12, 31
+; GCN-NEXT:    s_lshl_b64 s[16:17], s[12:13], 2
+; GCN-NEXT:    s_mov_b64 s[14:15], 0
+; GCN-NEXT:    v_mov_b32_e32 v1, s16
+; GCN-NEXT:    v_mov_b32_e32 v2, s17
+; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; GCN-NEXT:    s_cmp_eq_u32 s12, 32
+; GCN-NEXT:    s_cselect_b64 s[16:17], -1, 0
+; GCN-NEXT:    s_branch .LBB0_3
+; GCN-NEXT:  .LBB0_9: ; %DummyReturnBlock
+; GCN-NEXT:    s_endpgm
 bb:
   br label %bb1
 
@@ -30,20 +88,51 @@ bb4:
   br i1 %tmp7, label %bb1, label %bb2
 }
 
-; GCN-LABEL: {{^}}negated_cond_dominated_blocks:
-; GCN:   s_cmp_lg_u32
-; GCN:   s_cselect_b64  [[CC1:[^,]+]], -1, 0
-; GCN:   s_branch [[BB1:.LBB[0-9]+_[0-9]+]]
-; GCN: [[BB0:.LBB[0-9]+_[0-9]+]]
-; GCN-NOT: v_cndmask_b32
-; GCN-NOT: v_cmp
-; GCN: [[BB1]]:
-; GCN:   s_mov_b64 vcc, [[CC1]]
-; GCN:   s_cbranch_vccz [[BB2:.LBB[0-9]+_[0-9]+]]
-; GCN:   s_mov_b64 vcc, exec
-; GCN:   s_cbranch_execnz [[BB0]]
-; GCN: [[BB2]]:
 define amdgpu_kernel void @negated_cond_dominated_blocks(ptr addrspace(1) %arg1) {
+; GCN-LABEL: negated_cond_dominated_blocks:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GCN-NEXT:    s_mov_b32 s6, 0
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lg_u32 s0, 0
+; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; GCN-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    s_mov_b32 s3, s6
+; GCN-NEXT:    s_branch .LBB1_2
+; GCN-NEXT:  .LBB1_1: ; %bb7
+; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; GCN-NEXT:    s_ashr_i32 s3, s2, 31
+; GCN-NEXT:    s_lshl_b64 s[8:9], s[2:3], 2
+; GCN-NEXT:    v_mov_b32_e32 v1, s8
+; GCN-NEXT:    v_mov_b32_e32 v2, s9
+; GCN-NEXT:    s_cmp_eq_u32 s2, 32
+; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; GCN-NEXT:    s_mov_b32 s3, s2
+; GCN-NEXT:    s_cbranch_scc1 .LBB1_6
+; GCN-NEXT:  .LBB1_2: ; %bb4
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_mov_b64 vcc, s[0:1]
+; GCN-NEXT:    s_cbranch_vccz .LBB1_4
+; GCN-NEXT:  ; %bb.3: ; %bb6
+; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; GCN-NEXT:    s_add_i32 s2, s3, 1
+; GCN-NEXT:    s_mov_b64 vcc, exec
+; GCN-NEXT:    s_cbranch_execnz .LBB1_1
+; GCN-NEXT:    s_branch .LBB1_5
+; GCN-NEXT:  .LBB1_4: ; in Loop: Header=BB1_2 Depth=1
+; GCN-NEXT:    ; implicit-def: $sgpr2
+; GCN-NEXT:    s_mov_b64 vcc, 0
+; GCN-NEXT:  .LBB1_5: ; %bb5
+; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
+; GCN-NEXT:    s_lshl_b32 s2, s3, 5
+; GCN-NEXT:    s_or_b32 s2, s2, 1
+; GCN-NEXT:    s_branch .LBB1_1
+; GCN-NEXT:  .LBB1_6: ; %bb3
+; GCN-NEXT:    s_endpgm
 bb:
   br label %bb2
 
diff --git a/llvm/test/CodeGen/VE/Scalar/br_jt.ll b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
index bc7b26abe7e046f..216d4cca097001c 100644
--- a/llvm/test/CodeGen/VE/Scalar/br_jt.ll
+++ b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc < %s -mtriple=ve | FileCheck %s
 ; RUN: llc < %s -mtriple=ve -relocation-model=pic \
 ; RUN:     | FileCheck %s -check-prefix=PIC
@@ -11,22 +12,22 @@ define signext i32 @br_jt3(i32 signext %0) {
 ; CHECK-LABEL: br_jt3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    breq.w 1, %s0, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:    breq.w 1, %s0, .LBB0_1
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    breq.w 4, %s0, .LBB{{[0-9]+}}_5
+; CHECK-NEXT:    breq.w 4, %s0, .LBB0_5
 ; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    brne.w 2, %s0, .LBB{{[0-9]+}}_6
+; CHECK-NEXT:    brne.w 2, %s0, .LBB0_6
 ; CHECK-NEXT:  # %bb.4:
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_1:
+; CHECK-NEXT:  .LBB0_1:
 ; CHECK-NEXT:    or %s0, 3, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_5:
+; CHECK-NEXT:  .LBB0_5:
 ; CHECK-NEXT:    or %s0, 7, (0)1
-; CHECK-NEXT:  .LBB{{[0-9]+}}_6:
+; CHECK-NEXT:  .LBB0_6:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
 ;
@@ -78,7 +79,7 @@ define signext i32 @br_jt4(i32 signext %0) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    adds.w.sx %s1, -1, %s0
 ; CHECK-NEXT:    cmpu.w %s2, 3, %s1
-; CHECK-NEXT:    brgt.w 0, %s2, .LBB{{[0-9]+}}_2
+; CHECK-NEXT:    brgt.w 0, %s2, .LBB1_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
 ; CHECK-NEXT:    sll %s0, %s0, 2
@@ -87,7 +88,7 @@ define signext i32 @br_jt4(i32 signext %0) {
 ; CHECK-NEXT:    lea.sl %s1, .Lswitch.table.br_jt4 at hi(, %s1)
 ; CHECK-NEXT:    ldl.sx %s0, (%s0, %s1)
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:  .LBB1_2:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
 ;
@@ -138,18 +139,18 @@ define signext i32 @br_jt7(i32 signext %0) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    adds.w.sx %s1, -1, %s0
 ; CHECK-NEXT:    cmpu.w %s2, 8, %s1
-; CHECK-NEXT:    brgt.w 0, %s2, .LBB{{[0-9]+}}_3
+; CHECK-NEXT:    brgt.w 0, %s2, .LBB2_3
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    and %s2, %s1, (48)0
 ; CHECK-NEXT:    lea %s3, 463
 ; CHECK-NEXT:    and %s3, %s3, (32)0
 ; CHECK-NEXT:    srl %s2, %s3, %s2
 ; CHECK-NEXT:    and %s2, 1, %s2
-; CHECK-NEXT:    brne.w 0, %s2, .LBB{{[0-9]+}}_2
-; CHECK-NEXT:  .LBB{{[0-9]+}}_3:
+; CHECK-NEXT:    brne.w 0, %s2, .LBB2_2
+; CHECK-NEXT:  .LBB2_3:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:  .LBB2_2:
 ; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
 ; CHECK-NEXT:    sll %s0, %s0, 2
 ; CHECK-NEXT:    lea %s1, .Lswitch.table.br_jt7 at lo
@@ -219,18 +220,18 @@ define signext i32 @br_jt8(i32 signext %0) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    adds.w.sx %s1, -1, %s0
 ; CHECK-NEXT:    cmpu.w %s2, 8, %s1
-; CHECK-NEXT:    brgt.w 0, %s2, .LBB{{[0-9]+}}_3
+; CHECK-NEXT:    brgt.w 0, %s2, .LBB3_3
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    and %s2, %s1, (48)0
 ; CHECK-NEXT:    lea %s3, 495
 ; CHECK-NEXT:    and %s3, %s3, (32)0
 ; CHECK-NEXT:    srl %s2, %s3, %s2
 ; CHECK-NEXT:    and %s2, 1, %s2
-; CHECK-NEXT:    brne.w 0, %s2, .LBB{{[0-9]+}}_2
-; CHECK-NEXT:  .LBB{{[0-9]+}}_3:
+; CHECK-NEXT:    brne.w 0, %s2, .LBB3_2
+; CHECK-NEXT:  .LBB3_3:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:  .LBB3_2:
 ; CHECK-NEXT:    adds.w.sx %s0, %s1, (0)1
 ; CHECK-NEXT:    sll %s0, %s0, 2
 ; CHECK-NEXT:    lea %s1, .Lswitch.table.br_jt8 at lo
@@ -298,23 +299,23 @@ define signext i32 @br_jt3_m(i32 signext %0, i32 signext %1) {
 ; CHECK-LABEL: br_jt3_m:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    and %s0, %s0, (32)0
-; CHECK-NEXT:    breq.w 1, %s0, .LBB{{[0-9]+}}_1
+; CHECK-NEXT:    breq.w 1, %s0, .LBB4_1
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    breq.w 4, %s0, .LBB{{[0-9]+}}_5
+; CHECK-NEXT:    breq.w 4, %s0, .LBB4_5
 ; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    brne.w 2, %s0, .LBB{{[0-9]+}}_6
+; CHECK-NEXT:    brne.w 2, %s0, .LBB4_6
 ; CHECK-NEXT:  # %bb.4:
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_1:
+; CHECK-NEXT:  .LBB4_1:
 ; CHECK-NEXT:    or %s0, 3, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_5:
+; CHECK-NEXT:  .LBB4_5:
 ; CHECK-NEXT:    and %s0, %s1, (32)0
 ; CHECK-NEXT:    adds.w.sx %s0, 3, %s0
-; CHECK-NEXT:  .LBB{{[0-9]+}}_6:
+; CHECK-NEXT:  .LBB4_6:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
 ;
@@ -368,7 +369,7 @@ define signext i32 @br_jt4_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    adds.w.sx %s2, -1, %s0
 ; CHECK-NEXT:    cmpu.w %s3, 3, %s2
-; CHECK-NEXT:    brgt.w 0, %s3, .LBB{{[0-9]+}}_5
+; CHECK-NEXT:    brgt.w 0, %s3, .LBB5_5
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    adds.w.zx %s0, %s2, (0)1
 ; CHECK-NEXT:    sll %s0, %s0, 3
@@ -378,18 +379,18 @@ define signext i32 @br_jt4_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    ld %s2, (%s2, %s0)
 ; CHECK-NEXT:    or %s0, 3, (0)1
 ; CHECK-NEXT:    b.l.t (, %s2)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:  .LBB5_2:
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_3:
+; CHECK-NEXT:  .LBB5_3:
 ; CHECK-NEXT:    or %s0, 4, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:  .LBB5_4:
 ; CHECK-NEXT:    and %s0, %s1, (32)0
 ; CHECK-NEXT:    adds.w.sx %s0, 3, %s0
-; CHECK-NEXT:  .LBB{{[0-9]+}}_5:
+; CHECK-NEXT:  .LBB5_5:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
 ;
@@ -455,7 +456,7 @@ define signext i32 @br_jt7_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    and %s2, %s0, (32)0
 ; CHECK-NEXT:    adds.w.sx %s0, -1, %s2
 ; CHECK-NEXT:    cmpu.w %s3, 8, %s0
-; CHECK-NEXT:    brgt.w 0, %s3, .LBB{{[0-9]+}}_8
+; CHECK-NEXT:    brgt.w 0, %s3, .LBB6_8
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
 ; CHECK-NEXT:    sll %s0, %s0, 3
@@ -466,32 +467,32 @@ define signext i32 @br_jt7_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    or %s0, 3, (0)1
 ; CHECK-NEXT:    b.l.t (, %s3)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:  .LBB6_2:
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_3:
+; CHECK-NEXT:  .LBB6_3:
 ; CHECK-NEXT:    or %s0, 4, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:  .LBB6_4:
 ; CHECK-NEXT:    adds.w.sx %s0, 3, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_8:
+; CHECK-NEXT:  .LBB6_8:
 ; CHECK-NEXT:    or %s0, 0, %s2
-; CHECK-NEXT:  .LBB{{[0-9]+}}_9:
+; CHECK-NEXT:  .LBB6_9:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_7:
+; CHECK-NEXT:  .LBB6_7:
 ; CHECK-NEXT:    or %s0, 11, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_6:
+; CHECK-NEXT:  .LBB6_6:
 ; CHECK-NEXT:    or %s0, 10, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_5:
+; CHECK-NEXT:  .LBB6_5:
 ; CHECK-NEXT:    adds.w.sx %s0, -2, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -591,7 +592,7 @@ define signext i32 @br_jt8_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    and %s2, %s0, (32)0
 ; CHECK-NEXT:    adds.w.sx %s0, -1, %s2
 ; CHECK-NEXT:    cmpu.w %s3, 8, %s0
-; CHECK-NEXT:    brgt.w 0, %s3, .LBB{{[0-9]+}}_9
+; CHECK-NEXT:    brgt.w 0, %s3, .LBB7_9
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    adds.w.zx %s0, %s0, (0)1
 ; CHECK-NEXT:    sll %s0, %s0, 3
@@ -602,36 +603,36 @@ define signext i32 @br_jt8_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    and %s1, %s1, (32)0
 ; CHECK-NEXT:    or %s0, 3, (0)1
 ; CHECK-NEXT:    b.l.t (, %s3)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:  .LBB7_2:
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_3:
+; CHECK-NEXT:  .LBB7_3:
 ; CHECK-NEXT:    or %s0, 4, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_4:
+; CHECK-NEXT:  .LBB7_4:
 ; CHECK-NEXT:    adds.w.sx %s0, 3, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_9:
+; CHECK-NEXT:  .LBB7_9:
 ; CHECK-NEXT:    or %s0, 0, %s2
-; CHECK-NEXT:  .LBB{{[0-9]+}}_10:
+; CHECK-NEXT:  .LBB7_10:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_5:
+; CHECK-NEXT:  .LBB7_5:
 ; CHECK-NEXT:    adds.w.sx %s0, -5, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_6:
+; CHECK-NEXT:  .LBB7_6:
 ; CHECK-NEXT:    adds.w.sx %s0, -2, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_8:
+; CHECK-NEXT:  .LBB7_8:
 ; CHECK-NEXT:    or %s0, 11, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB{{[0-9]+}}_7:
+; CHECK-NEXT:  .LBB7_7:
 ; CHECK-NEXT:    or %s0, 10, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index 6d596195fe7f696..bf939c4131080d3 100644
--- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
 ; Make sure xorl operands are 32-bit registers.
 
diff --git a/llvm/test/CodeGen/X86/dup-cost.ll b/llvm/test/CodeGen/X86/dup-cost.ll
index 523f0f1154e94d3..ec9d36aa2a11b65 100644
--- a/llvm/test/CodeGen/X86/dup-cost.ll
+++ b/llvm/test/CodeGen/X86/dup-cost.ll
@@ -1,14 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
 
 ; Cold function, %dup should not be duplicated into predecessors.
 define i32 @cold(i32 %a, ptr %p, ptr %q) !prof !21 {
-; CHECK-LABEL: cold
-; CHECK:       %entry
-; CHECK:       %true1
-; CHECK:       %dup
-; CHECK:       %true2
-; CHECK:       %false1
-; CHECK:       %false2
+; CHECK-LABEL: cold:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %true1
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    addl $2, %eax
+; CHECK-NEXT:  .LBB0_3: # %dup
+; CHECK-NEXT:    cmpl $5, %eax
+; CHECK-NEXT:    jl .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %true2
+; CHECK-NEXT:    xorl %edi, %eax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_2: # %false1
+; CHECK-NEXT:    movl (%rdx), %eax
+; CHECK-NEXT:    addl $-3, %eax
+; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:  .LBB0_5: # %false2
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond1 = icmp sgt i32 %a, 1
   br i1 %cond1, label %true1, label %false1, !prof !30
@@ -44,12 +58,26 @@ exit:
 ; Same code as previous function, but with hot profile count.
 ; So %dup should be duplicated into predecessors.
 define i32 @hot(i32 %a, ptr %p, ptr %q) !prof !22 {
-; CHECK-LABEL: hot
-; CHECK:       %entry
-; CHECK:       %true1
-; CHECK:       %false2
-; CHECK:       %false1
-; CHECK:       %true2
+; CHECK-LABEL: hot:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB1_2
+; CHECK-NEXT:  # %bb.1: # %true1
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    addl $2, %eax
+; CHECK-NEXT:    cmpl $5, %eax
+; CHECK-NEXT:    jge .LBB1_4
+; CHECK-NEXT:  .LBB1_5: # %false2
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB1_2: # %false1
+; CHECK-NEXT:    movl (%rdx), %eax
+; CHECK-NEXT:    addl $-3, %eax
+; CHECK-NEXT:    cmpl $5, %eax
+; CHECK-NEXT:    jl .LBB1_5
+; CHECK-NEXT:  .LBB1_4: # %true2
+; CHECK-NEXT:    xorl %edi, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond1 = icmp sgt i32 %a, 1
   br i1 %cond1, label %true1, label %false1, !prof !30

>From fc034b7ed036868b23a18fc261c2b559cc9520a0 Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Thu, 21 Sep 2023 11:18:57 -0700
Subject: [PATCH 2/2] MachineBlockPlacement: Add tolerance to comparisons

The LLVM BFI data is susceptible to numerical "noise" in the lower bits:
To get some feeling for this consider that we store `BlockFrequency` as
64-bit integers and branch weights as 32-bit. There is a natural
numerical difference when the branch weights divisions are not naturally
representable as integers and because block frequencies are scaled by
loop factors the absolute values of those errors can appear big.
Regardless it should be a small relative difference relative to the
frequency numbers.

- Add `BlockFrequency::almostEqual` function to check whether two
  frequencies are close enough to each other that the topmost N bits
  are equal and only lower bits differ.
- Change `MachineBlockPlacement::selectBestCandidateBlock` to be a
  two-pass algorithm that first finds determines the candidates with the
  biggest frequency and then performs a 2nd pass to check among blocks
  with "almostEqual" frequency to prefer the one already being the
  layout successor or first in the worklist.

The currently value of 20 significant bits works well to reduce
unnecessary changes in my upcoming BFI precision changes.
---
 llvm/include/llvm/Support/BlockFrequency.h    |  23 ++
 llvm/lib/CodeGen/MachineBlockPlacement.cpp    |  40 ++-
 llvm/test/CodeGen/AArch64/cfi-fixup.ll        |  24 +-
 .../CodeGen/AArch64/machine-licm-sub-loop.ll  |  67 +++--
 llvm/test/CodeGen/AArch64/ragreedy-csr.ll     | 234 +++++++++---------
 llvm/test/CodeGen/AArch64/sink-and-fold.ll    |  46 ++--
 .../divergent-branch-uniform-condition.ll     |  31 +--
 .../CodeGen/AMDGPU/loop_header_nopred.mir     |  83 ++++---
 ...p-var-out-of-divergent-loop-swdev407790.ll |  70 +++---
 .../CodeGen/AMDGPU/memcpy-crash-issue63986.ll |  68 ++---
 llvm/test/CodeGen/AMDGPU/multilevel-break.ll  |  51 ++--
 .../CodeGen/AMDGPU/optimize-negated-cond.ll   |  66 ++---
 .../AMDGPU/promote-constOffset-to-imm.ll      | 125 +++++-----
 llvm/test/CodeGen/ARM/code-placement.ll       |  11 +-
 llvm/test/CodeGen/ARM/loop-indexing.ll        |  23 +-
 llvm/test/CodeGen/AVR/rot.ll                  |  32 +--
 llvm/test/CodeGen/AVR/rotate.ll               |  18 +-
 llvm/test/CodeGen/AVR/shift.ll                |  16 +-
 .../Generic/machine-function-splitter.ll      |   1 -
 .../Hexagon/lsr-postinc-nested-loop.ll        |   4 +-
 llvm/test/CodeGen/Hexagon/prof-early-if.ll    |   2 +-
 llvm/test/CodeGen/Hexagon/swp-multi-loops.ll  |   8 +-
 .../Mips/indirect-jump-hazard/jumptables.ll   | 208 ++++++++--------
 llvm/test/CodeGen/Mips/jump-table-mul.ll      |  32 +--
 llvm/test/CodeGen/Mips/nacl-align.ll          |  10 +-
 llvm/test/CodeGen/Mips/pseudo-jump-fill.ll    |  14 +-
 .../PowerPC/2007-11-16-landingpad-split.ll    |  28 ++-
 llvm/test/CodeGen/PowerPC/branch-opt.ll       |   9 +-
 .../PowerPC/jump-tables-collapse-rotate.ll    |  39 ++-
 llvm/test/CodeGen/PowerPC/licm-tocReg.ll      |  44 ++--
 .../CodeGen/PowerPC/lsr-profitable-chain.ll   |  34 +--
 llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll   |   8 +-
 llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll   |  20 +-
 llvm/test/CodeGen/PowerPC/p10-spill-crun.ll   |  32 +--
 llvm/test/CodeGen/PowerPC/pr43527.ll          |   8 +-
 llvm/test/CodeGen/PowerPC/pr45448.ll          |  14 +-
 llvm/test/CodeGen/PowerPC/pr46759.ll          |  16 +-
 llvm/test/CodeGen/PowerPC/pr48519.ll          |  44 ++--
 .../CodeGen/PowerPC/stack-clash-prologue.ll   |  81 +++---
 .../CodeGen/RISCV/shrinkwrap-jump-table.ll    |  20 +-
 llvm/test/CodeGen/Thumb/pr42760.ll            |  33 +--
 .../Thumb2/LowOverheadLoops/memcall.ll        |  50 ++--
 .../CodeGen/Thumb2/bti-indirect-branches.ll   |  37 +--
 llvm/test/CodeGen/Thumb2/constant-hoisting.ll |  18 +-
 .../test/CodeGen/Thumb2/mve-blockplacement.ll |  72 +++---
 .../CodeGen/Thumb2/mve-float16regloops.ll     |  68 ++---
 .../CodeGen/Thumb2/mve-float32regloops.ll     | 132 +++++-----
 .../Thumb2/mve-gather-scatter-optimisation.ll | 153 ++++++------
 llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll  |  78 +++---
 llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll    |  64 +++--
 llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll   |  70 +++---
 llvm/test/CodeGen/VE/Scalar/br_jt.ll          |  64 ++---
 .../CodeGen/X86/2007-01-13-StackPtrIndex.ll   | 105 ++++----
 .../CodeGen/X86/2008-04-17-CoalescerBug.ll    |  18 +-
 llvm/test/CodeGen/X86/2009-08-12-badswitch.ll |   6 +-
 llvm/test/CodeGen/X86/block-placement.ll      |   2 +-
 llvm/test/CodeGen/X86/callbr-asm-outputs.ll   |  20 +-
 .../X86/code_placement_loop_rotation2.ll      |   2 +-
 .../CodeGen/X86/dag-update-nodetomatch.ll     |  47 ++--
 llvm/test/CodeGen/X86/dup-cost.ll             |   6 +-
 llvm/test/CodeGen/X86/hoist-invariant-load.ll |  30 ++-
 .../test/CodeGen/X86/loop-strength-reduce7.ll |  11 +-
 llvm/test/CodeGen/X86/mul-constant-result.ll  | 112 ++++-----
 llvm/test/CodeGen/X86/pic.ll                  |  22 +-
 llvm/test/CodeGen/X86/postalloc-coalescing.ll |   9 +-
 llvm/test/CodeGen/X86/pr38743.ll              |  10 +-
 llvm/test/CodeGen/X86/pr38795.ll              |  79 +++---
 llvm/test/CodeGen/X86/pr63692.ll              |   9 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |   8 +-
 .../speculative-load-hardening-indirect.ll    |  24 +-
 .../CodeGen/X86/speculative-load-hardening.ll |  32 +--
 llvm/test/CodeGen/X86/switch.ll               |  21 +-
 .../X86/tail-dup-merge-loop-headers.ll        |  84 +++----
 llvm/test/CodeGen/X86/win-catchpad.ll         |   8 +-
 llvm/unittests/Support/BlockFrequencyTest.cpp |  30 +++
 75 files changed, 1679 insertions(+), 1559 deletions(-)

diff --git a/llvm/include/llvm/Support/BlockFrequency.h b/llvm/include/llvm/Support/BlockFrequency.h
index 12a753301b36aba..7c45360fee14051 100644
--- a/llvm/include/llvm/Support/BlockFrequency.h
+++ b/llvm/include/llvm/Support/BlockFrequency.h
@@ -14,6 +14,7 @@
 #define LLVM_SUPPORT_BLOCKFREQUENCY_H
 
 #include <cassert>
+#include <climits>
 #include <cstdint>
 #include <optional>
 
@@ -93,6 +94,28 @@ class BlockFrequency {
     return *this;
   }
 
+  /// Returns true if `this` is greater than `other` and the magnitude of the
+  /// difference falls within the topmost `SignificantBits` of `this`.
+  ///
+  /// The formula is related to comparing a "relative change" to a threshold.
+  /// When choosing the threshold as a negative power-of-two things can be
+  /// computed cheaply:
+  ///   with A = this->Frequency; B = Other.Frequency; T = SignificantBits
+  ///   relative_change(A, B) = abs(A - B) / A
+  ///         (A - B) / A  <=  2**(-T)
+  ///   <=>   A - B        <=  2**(-T) * A
+  ///   <=>   A - B        <=  shr(A, T)
+  bool almostEqual(BlockFrequency Other, unsigned SignificantBits = 20) const {
+    // Expect Other to be greater or equal for an efficient implementation.
+    assert(Frequency >= Other.Frequency && "must be smaller than `Other`");
+    assert(0 <= SignificantBits &&
+           SignificantBits < sizeof(Frequency) * CHAR_BIT &&
+           "invalid SignificantBits value");
+    uint64_t Diff = Frequency - Other.Frequency;
+    uint64_t Threshold = Frequency >> SignificantBits;
+    return Diff <= Threshold;
+  }
+
   bool operator<(BlockFrequency RHS) const {
     return Frequency < RHS.Frequency;
   }
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 603c0e9600afc32..f15efae54aa559c 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1714,8 +1714,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
     return nullptr;
 
   bool IsEHPad = WorkList[0]->isEHPad();
+  bool ReverseOrder = IsEHPad;
 
-  MachineBasicBlock *BestBlock = nullptr;
+  MachineBasicBlock *ChainEnd = *std::prev(Chain.end());
   BlockFrequency BestFreq;
   for (MachineBasicBlock *MBB : WorkList) {
     assert(MBB->isEHPad() == IsEHPad &&
@@ -1730,7 +1731,9 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
 
     BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
     LLVM_DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> ";
-               MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+               MBFI->printBlockFreq(dbgs(), CandidateFreq)
+               << " (freq) layoutsucc " << ChainEnd->isLayoutSuccessor(MBB)
+               << '\n');
 
     // For ehpad, we layout the least probable first as to avoid jumping back
     // from least probable landingpads to more probable ones.
@@ -1750,12 +1753,33 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
     //                 +-------------------------------------+
     //                 V                                     |
     // OuterLp -> OuterCleanup -> Resume     InnerLp -> InnerCleanup
-    if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
+    if (BestFreq > BlockFrequency(0) &&
+        (ReverseOrder ^ (BestFreq >= CandidateFreq)))
       continue;
 
-    BestBlock = MBB;
     BestFreq = CandidateFreq;
   }
+  // Perform a 2nd pass to make the output more stable: Check for blocks with
+  // almost the same frequency as `BestFreq` and prefer the one that is already
+  // a successor to `ChainEnd` first or whichever comes first in `WorkList`.
+  MachineBasicBlock *BestBlock = nullptr;
+  for (MachineBasicBlock *MBB : WorkList) {
+    BlockChain &SuccChain = *BlockToChain[MBB];
+    if (&SuccChain == &Chain)
+      continue;
+    BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+    if (!ReverseOrder) {
+      if (!BestFreq.almostEqual(CandidateFreq))
+        continue;
+    } else if (!CandidateFreq.almostEqual(BestFreq)) {
+      continue;
+    }
+    // Give precedence to the block already following `ChainEnd`, otherwise take
+    // the first one we encounter.
+    if (BestBlock == nullptr || ChainEnd->isLayoutSuccessor(MBB)) {
+      BestBlock = MBB;
+    }
+  }
 
   return BestBlock;
 }
@@ -2112,8 +2136,12 @@ MachineBlockPlacement::findBestLoopTopHelper(
 
     BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
                                             LoopBlockSet);
-    if ((Gains > 0) && (Gains > BestGains ||
-        ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
+    LLVM_DEBUG(dbgs() << "    Candidate: " << getBlockName(Pred) << " freq ";
+               MBFI->printBlockFreq(dbgs(), Pred);
+               dbgs() << " layoutsucc " << Pred->isLayoutSuccessor(OldTop)
+                      << " gains " << Gains.getFrequency() << '\n');
+    if (Gains >= BestGains &&
+        (Gains > BestGains || Pred->isLayoutSuccessor(OldTop))) {
       BestPred = Pred;
       BestGains = Gains;
     }
diff --git a/llvm/test/CodeGen/AArch64/cfi-fixup.ll b/llvm/test/CodeGen/AArch64/cfi-fixup.ll
index 9a4ad3bb07ee364..e746ea745b92a8c 100644
--- a/llvm/test/CodeGen/AArch64/cfi-fixup.ll
+++ b/llvm/test/CodeGen/AArch64/cfi-fixup.ll
@@ -8,13 +8,13 @@ define i32 @f0(i32 %x) #0 {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    .cfi_remember_state
-; CHECK-NEXT:    cbz w0, .LBB0_4
+; CHECK-NEXT:    cbz w0, .LBB0_6
 ; CHECK-NEXT:  // %bb.1: // %entry
 ; CHECK-NEXT:    cmp w0, #2
-; CHECK-NEXT:    b.eq .LBB0_5
+; CHECK-NEXT:    b.eq .LBB0_4
 ; CHECK-NEXT:  // %bb.2: // %entry
 ; CHECK-NEXT:    cmp w0, #1
-; CHECK-NEXT:    b.ne .LBB0_6
+; CHECK-NEXT:    b.ne .LBB0_5
 ; CHECK-NEXT:  // %bb.3: // %if.then2
 ; CHECK-NEXT:    bl g1
 ; CHECK-NEXT:    add w0, w0, #1
@@ -22,27 +22,27 @@ define i32 @f0(i32 %x) #0 {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:  .LBB0_4: // %if.then5
 ; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    .cfi_remember_state
-; CHECK-NEXT:    mov w0, #1
+; CHECK-NEXT:    bl g0
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    sub w0, w8, w0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_5: // %if.then5
+; CHECK-NEXT:  .LBB0_5: // %if.end7
 ; CHECK-NEXT:    .cfi_restore_state
 ; CHECK-NEXT:    .cfi_remember_state
-; CHECK-NEXT:    bl g0
-; CHECK-NEXT:    mov w8, #1
-; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    mov w0, wzr
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w30
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_6: // %if.end7
+; CHECK-NEXT:  .LBB0_6:
 ; CHECK-NEXT:    .cfi_restore_state
-; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    mov w0, #1 // =0x1
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NEXT:    .cfi_restore w30
@@ -115,7 +115,7 @@ define i32 @f2(i32 %x) #0 {
 ; CHECK-NEXT:    cbz w0, .LBB2_2
 ; CHECK-NEXT:  // %bb.1: // %if.end
 ; CHECK-NEXT:    bl g1
-; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    sub w0, w8, w0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    .cfi_def_cfa_offset 0
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
index f6bbdf5d95d8705..2f9e56668ca4bbe 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
@@ -15,32 +15,46 @@ define void @foo(i32 noundef %limit, ptr %out, ptr %y) {
 ; CHECK-NEXT:    and x12, x10, #0xfffffff0
 ; CHECK-NEXT:    add x13, x1, #32
 ; CHECK-NEXT:    add x14, x2, #16
-; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:  .LBB0_2: // %for.cond1.for.cond.cleanup3_crit_edge.us
-; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_2: // in Loop: Header=BB0_6 Depth=1
+; CHECK-NEXT:    mov x18, xzr
+; CHECK-NEXT:  .LBB0_3: // %for.body4.us.preheader
+; CHECK-NEXT:    // in Loop: Header=BB0_6 Depth=1
+; CHECK-NEXT:    add x16, x18, x8
+; CHECK-NEXT:    add x17, x2, x18, lsl #1
+; CHECK-NEXT:    sub x18, x10, x18
+; CHECK-NEXT:    add x16, x1, x16, lsl #2
+; CHECK-NEXT:  .LBB0_4: // %for.body4.us
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=1
+; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    ldrsh w3, [x17], #2
+; CHECK-NEXT:    ldr w4, [x16]
+; CHECK-NEXT:    subs x18, x18, #1
+; CHECK-NEXT:    madd w3, w3, w15, w4
+; CHECK-NEXT:    str w3, [x16], #4
+; CHECK-NEXT:    b.ne .LBB0_4
+; CHECK-NEXT:  .LBB0_5: // %for.cond1.for.cond.cleanup3_crit_edge.us
+; CHECK-NEXT:    // in Loop: Header=BB0_6 Depth=1
 ; CHECK-NEXT:    add x9, x9, #1
 ; CHECK-NEXT:    add x13, x13, x11
 ; CHECK-NEXT:    add x8, x8, x10
 ; CHECK-NEXT:    cmp x9, x10
 ; CHECK-NEXT:    b.eq .LBB0_10
-; CHECK-NEXT:  .LBB0_3: // %for.cond1.preheader.us
+; CHECK-NEXT:  .LBB0_6: // %for.cond1.preheader.us
 ; CHECK-NEXT:    // =>This Loop Header: Depth=1
-; CHECK-NEXT:    // Child Loop BB0_6 Depth 2
-; CHECK-NEXT:    // Child Loop BB0_9 Depth 2
+; CHECK-NEXT:    // Child Loop BB0_8 Depth 2
+; CHECK-NEXT:    // Child Loop BB0_4 Depth 2
 ; CHECK-NEXT:    ldrsh w15, [x2, x9, lsl #1]
 ; CHECK-NEXT:    cmp w0, #16
-; CHECK-NEXT:    b.hs .LBB0_5
-; CHECK-NEXT:  // %bb.4: // in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    mov x18, xzr
-; CHECK-NEXT:    b .LBB0_8
-; CHECK-NEXT:  .LBB0_5: // %vector.ph
-; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    b.lo .LBB0_2
+; CHECK-NEXT:  // %bb.7: // %vector.ph
+; CHECK-NEXT:    // in Loop: Header=BB0_6 Depth=1
 ; CHECK-NEXT:    dup v0.8h, w15
 ; CHECK-NEXT:    mov x16, x14
 ; CHECK-NEXT:    mov x17, x13
 ; CHECK-NEXT:    mov x18, x12
-; CHECK-NEXT:  .LBB0_6: // %vector.body
-; CHECK-NEXT:    // Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:  .LBB0_8: // %vector.body
+; CHECK-NEXT:    // Parent Loop BB0_6 Depth=1
 ; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldp q1, q4, [x16, #-16]
 ; CHECK-NEXT:    subs x18, x18, #16
@@ -53,28 +67,13 @@ define void @foo(i32 noundef %limit, ptr %out, ptr %y) {
 ; CHECK-NEXT:    smlal v6.4s, v0.4h, v4.4h
 ; CHECK-NEXT:    stp q3, q2, [x17, #-32]
 ; CHECK-NEXT:    stp q6, q5, [x17], #64
-; CHECK-NEXT:    b.ne .LBB0_6
-; CHECK-NEXT:  // %bb.7: // %middle.block
-; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    b.ne .LBB0_8
+; CHECK-NEXT:  // %bb.9: // %middle.block
+; CHECK-NEXT:    // in Loop: Header=BB0_6 Depth=1
 ; CHECK-NEXT:    cmp x12, x10
 ; CHECK-NEXT:    mov x18, x12
-; CHECK-NEXT:    b.eq .LBB0_2
-; CHECK-NEXT:  .LBB0_8: // %for.body4.us.preheader
-; CHECK-NEXT:    // in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    add x16, x18, x8
-; CHECK-NEXT:    add x17, x2, x18, lsl #1
-; CHECK-NEXT:    sub x18, x10, x18
-; CHECK-NEXT:    add x16, x1, x16, lsl #2
-; CHECK-NEXT:  .LBB0_9: // %for.body4.us
-; CHECK-NEXT:    // Parent Loop BB0_3 Depth=1
-; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    ldrsh w3, [x17], #2
-; CHECK-NEXT:    ldr w4, [x16]
-; CHECK-NEXT:    subs x18, x18, #1
-; CHECK-NEXT:    madd w3, w3, w15, w4
-; CHECK-NEXT:    str w3, [x16], #4
-; CHECK-NEXT:    b.ne .LBB0_9
-; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:    b.ne .LBB0_3
+; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:  .LBB0_10: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
index 5b501762418ef50..fa0ec49907658b5 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -24,7 +24,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    ldrh w8, [x0]
 ; CHECK-NEXT:    ldrh w9, [x1]
 ; CHECK-NEXT:    cmp w8, w9
-; CHECK-NEXT:    b.ne LBB0_47
+; CHECK-NEXT:    b.ne LBB0_2
 ; CHECK-NEXT:  ; %bb.1: ; %if.end
 ; CHECK-NEXT:    sub sp, sp, #64
 ; CHECK-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
@@ -41,8 +41,12 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:  Lloh1:
 ; CHECK-NEXT:    ldr x14, [x14, __DefaultRuneLocale at GOTPAGEOFF]
 ; CHECK-NEXT:    ldrsb x8, [x9, x11]
-; CHECK-NEXT:    tbz x8, #63, LBB0_3
-; CHECK-NEXT:  LBB0_2: ; %cond.false.i.i
+; CHECK-NEXT:    tbz x8, #63, LBB0_7
+; CHECK-NEXT:    b LBB0_6
+; CHECK-NEXT:  LBB0_2:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB0_3: ; %cond.false.i.i219
 ; CHECK-NEXT:    stp x9, x0, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    mov w1, #32768 ; =0x8000
@@ -61,32 +65,17 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    ldr w12, [sp, #4] ; 4-byte Folded Reload
 ; CHECK-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
-; CHECK-NEXT:    cbz w8, LBB0_4
-; CHECK-NEXT:    b LBB0_6
-; CHECK-NEXT:  LBB0_3: ; %cond.true.i.i
-; CHECK-NEXT:    add x8, x14, x8, lsl #2
-; CHECK-NEXT:    ldr w8, [x8, #60]
-; CHECK-NEXT:    and w8, w8, #0x8000
-; CHECK-NEXT:    cbnz w8, LBB0_6
-; CHECK-NEXT:  LBB0_4: ; %lor.rhs
-; CHECK-NEXT:    ldrsb x8, [x10, x11]
-; CHECK-NEXT:    tbnz x8, #63, LBB0_8
-; CHECK-NEXT:  ; %bb.5: ; %cond.true.i.i217
-; CHECK-NEXT:    add x8, x14, x8, lsl #2
-; CHECK-NEXT:    ldr w8, [x8, #60]
-; CHECK-NEXT:    and w8, w8, #0x8000
-; CHECK-NEXT:    cbz w8, LBB0_9
-; CHECK-NEXT:  LBB0_6: ; %while.body
+; CHECK-NEXT:    cbz w8, LBB0_10
+; CHECK-NEXT:  LBB0_4: ; %while.body
 ; CHECK-NEXT:    ldrb w8, [x9, x11]
 ; CHECK-NEXT:    ldrb w15, [x10, x11]
 ; CHECK-NEXT:    cmp w8, w15
-; CHECK-NEXT:    b.ne LBB0_42
-; CHECK-NEXT:  ; %bb.7: ; %if.end17
+; CHECK-NEXT:    b.ne LBB0_43
+; CHECK-NEXT:  ; %bb.5: ; %if.end17
 ; CHECK-NEXT:    add x11, x11, #1
 ; CHECK-NEXT:    ldrsb x8, [x9, x11]
-; CHECK-NEXT:    tbz x8, #63, LBB0_3
-; CHECK-NEXT:    b LBB0_2
-; CHECK-NEXT:  LBB0_8: ; %cond.false.i.i219
+; CHECK-NEXT:    tbz x8, #63, LBB0_7
+; CHECK-NEXT:  LBB0_6: ; %cond.false.i.i
 ; CHECK-NEXT:    stp x9, x0, [sp, #32] ; 16-byte Folded Spill
 ; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    mov w1, #32768 ; =0x8000
@@ -105,163 +94,174 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
 ; CHECK-NEXT:    ldr w12, [sp, #4] ; 4-byte Folded Reload
 ; CHECK-NEXT:    ldr x10, [sp, #8] ; 8-byte Folded Reload
 ; CHECK-NEXT:    ldr x0, [sp, #40] ; 8-byte Folded Reload
-; CHECK-NEXT:    cbnz w8, LBB0_6
-; CHECK-NEXT:  LBB0_9: ; %while.end
+; CHECK-NEXT:    cbz w8, LBB0_8
+; CHECK-NEXT:    b LBB0_4
+; CHECK-NEXT:  LBB0_7: ; %cond.true.i.i
+; CHECK-NEXT:    add x8, x14, x8, lsl #2
+; CHECK-NEXT:    ldr w8, [x8, #60]
+; CHECK-NEXT:    and w8, w8, #0x8000
+; CHECK-NEXT:    cbnz w8, LBB0_4
+; CHECK-NEXT:  LBB0_8: ; %lor.rhs
+; CHECK-NEXT:    ldrsb x8, [x10, x11]
+; CHECK-NEXT:    tbnz x8, #63, LBB0_3
+; CHECK-NEXT:  ; %bb.9: ; %cond.true.i.i217
+; CHECK-NEXT:    add x8, x14, x8, lsl #2
+; CHECK-NEXT:    ldr w8, [x8, #60]
+; CHECK-NEXT:    and w8, w8, #0x8000
+; CHECK-NEXT:    cbnz w8, LBB0_4
+; CHECK-NEXT:  LBB0_10: ; %while.end
 ; CHECK-NEXT:    orr w8, w13, w12
-; CHECK-NEXT:    cbnz w8, LBB0_24
-; CHECK-NEXT:  ; %bb.10: ; %if.then23
+; CHECK-NEXT:    cbnz w8, LBB0_25
+; CHECK-NEXT:  ; %bb.11: ; %if.then23
 ; CHECK-NEXT:    ldr x12, [x0, #16]
 ; CHECK-NEXT:    ldrb w8, [x9, x11]
 ; CHECK-NEXT:    ldrb w13, [x12]
 ; CHECK-NEXT:    cmp w13, #83
-; CHECK-NEXT:    b.eq LBB0_19
-; CHECK-NEXT:  LBB0_11: ; %while.cond59.preheader
-; CHECK-NEXT:    cbz w8, LBB0_23
-; CHECK-NEXT:  LBB0_12: ; %land.rhs.preheader
+; CHECK-NEXT:    b.eq LBB0_20
+; CHECK-NEXT:  LBB0_12: ; %while.cond59.preheader
+; CHECK-NEXT:    cbz w8, LBB0_24
+; CHECK-NEXT:  LBB0_13: ; %land.rhs.preheader
 ; CHECK-NEXT:    add x12, x9, x11
 ; CHECK-NEXT:    add x9, x10, x11
 ; CHECK-NEXT:    add x10, x12, #1
-; CHECK-NEXT:  LBB0_13: ; %land.rhs
+; CHECK-NEXT:  LBB0_14: ; %land.rhs
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrb w11, [x9], #1
-; CHECK-NEXT:    cbz w11, LBB0_23
-; CHECK-NEXT:  ; %bb.14: ; %while.body66
-; CHECK-NEXT:    ; in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    cmp w8, #42
-; CHECK-NEXT:    b.eq LBB0_18
+; CHECK-NEXT:    cbz w11, LBB0_24
 ; CHECK-NEXT:  ; %bb.15: ; %while.body66
-; CHECK-NEXT:    ; in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    ; in Loop: Header=BB0_14 Depth=1
+; CHECK-NEXT:    cmp w8, #42
+; CHECK-NEXT:    b.eq LBB0_19
+; CHECK-NEXT:  ; %bb.16: ; %while.body66
+; CHECK-NEXT:    ; in Loop: Header=BB0_14 Depth=1
 ; CHECK-NEXT:    cmp w11, #42
-; CHECK-NEXT:    b.eq LBB0_18
-; CHECK-NEXT:  ; %bb.16: ; %lor.lhs.false74
-; CHECK-NEXT:    ; in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    b.eq LBB0_19
+; CHECK-NEXT:  ; %bb.17: ; %lor.lhs.false74
+; CHECK-NEXT:    ; in Loop: Header=BB0_14 Depth=1
 ; CHECK-NEXT:    cmp w8, w11
 ; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    b.ne LBB0_43
-; CHECK-NEXT:  ; %bb.17: ; %lor.lhs.false74
-; CHECK-NEXT:    ; in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    b.ne LBB0_44
+; CHECK-NEXT:  ; %bb.18: ; %lor.lhs.false74
+; CHECK-NEXT:    ; in Loop: Header=BB0_14 Depth=1
 ; CHECK-NEXT:    cmp w8, #94
-; CHECK-NEXT:    b.eq LBB0_43
-; CHECK-NEXT:  LBB0_18: ; %if.then83
-; CHECK-NEXT:    ; in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    b.eq LBB0_44
+; CHECK-NEXT:  LBB0_19: ; %if.then83
+; CHECK-NEXT:    ; in Loop: Header=BB0_14 Depth=1
 ; CHECK-NEXT:    ldrb w8, [x10], #1
 ; CHECK-NEXT:    mov w0, #1 ; =0x1
-; CHECK-NEXT:    cbnz w8, LBB0_13
-; CHECK-NEXT:    b LBB0_43
-; CHECK-NEXT:  LBB0_19: ; %land.lhs.true28
-; CHECK-NEXT:    cbz w8, LBB0_23
-; CHECK-NEXT:  ; %bb.20: ; %land.lhs.true28
+; CHECK-NEXT:    cbnz w8, LBB0_14
+; CHECK-NEXT:    b LBB0_44
+; CHECK-NEXT:  LBB0_20: ; %land.lhs.true28
+; CHECK-NEXT:    cbz w8, LBB0_24
+; CHECK-NEXT:  ; %bb.21: ; %land.lhs.true28
 ; CHECK-NEXT:    cmp w8, #112
-; CHECK-NEXT:    b.ne LBB0_12
-; CHECK-NEXT:  ; %bb.21: ; %land.lhs.true35
+; CHECK-NEXT:    b.ne LBB0_13
+; CHECK-NEXT:  ; %bb.22: ; %land.lhs.true35
 ; CHECK-NEXT:    ldrb w13, [x10, x11]
 ; CHECK-NEXT:    cmp w13, #112
-; CHECK-NEXT:    b.ne LBB0_12
-; CHECK-NEXT:  ; %bb.22: ; %land.lhs.true43
+; CHECK-NEXT:    b.ne LBB0_13
+; CHECK-NEXT:  ; %bb.23: ; %land.lhs.true43
 ; CHECK-NEXT:    sub x12, x9, x12
 ; CHECK-NEXT:    add x12, x12, x11
 ; CHECK-NEXT:    cmp x12, #1
-; CHECK-NEXT:    b.ne LBB0_44
-; CHECK-NEXT:  LBB0_23:
+; CHECK-NEXT:    b.ne LBB0_45
+; CHECK-NEXT:  LBB0_24:
 ; CHECK-NEXT:    mov w0, #1 ; =0x1
-; CHECK-NEXT:    b LBB0_43
-; CHECK-NEXT:  LBB0_24: ; %if.else88
+; CHECK-NEXT:    b LBB0_44
+; CHECK-NEXT:  LBB0_25: ; %if.else88
 ; CHECK-NEXT:    cmp w12, #1
-; CHECK-NEXT:    b.ne LBB0_33
-; CHECK-NEXT:  ; %bb.25: ; %if.else88
+; CHECK-NEXT:    b.ne LBB0_34
+; CHECK-NEXT:  ; %bb.26: ; %if.else88
 ; CHECK-NEXT:    cmp w13, #2
-; CHECK-NEXT:    b.ne LBB0_33
-; CHECK-NEXT:  ; %bb.26: ; %while.cond95.preheader
+; CHECK-NEXT:    b.ne LBB0_34
+; CHECK-NEXT:  ; %bb.27: ; %while.cond95.preheader
 ; CHECK-NEXT:    ldrb w12, [x9, x11]
-; CHECK-NEXT:    cbz w12, LBB0_23
-; CHECK-NEXT:  ; %bb.27: ; %land.rhs99.preheader
+; CHECK-NEXT:    cbz w12, LBB0_24
+; CHECK-NEXT:  ; %bb.28: ; %land.rhs99.preheader
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:    mov w0, #1 ; =0x1
-; CHECK-NEXT:    b LBB0_29
-; CHECK-NEXT:  LBB0_28: ; %if.then117
-; CHECK-NEXT:    ; in Loop: Header=BB0_29 Depth=1
+; CHECK-NEXT:    b LBB0_30
+; CHECK-NEXT:  LBB0_29: ; %if.then117
+; CHECK-NEXT:    ; in Loop: Header=BB0_30 Depth=1
 ; CHECK-NEXT:    add x12, x9, x8
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    add x12, x12, x11
 ; CHECK-NEXT:    ldrb w12, [x12, #1]
-; CHECK-NEXT:    cbz w12, LBB0_43
-; CHECK-NEXT:  LBB0_29: ; %land.rhs99
+; CHECK-NEXT:    cbz w12, LBB0_44
+; CHECK-NEXT:  LBB0_30: ; %land.rhs99
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    add x13, x10, x8
 ; CHECK-NEXT:    ldrb w13, [x13, x11]
-; CHECK-NEXT:    cbz w13, LBB0_23
-; CHECK-NEXT:  ; %bb.30: ; %while.body104
-; CHECK-NEXT:    ; in Loop: Header=BB0_29 Depth=1
-; CHECK-NEXT:    cmp w12, w13
-; CHECK-NEXT:    b.eq LBB0_28
+; CHECK-NEXT:    cbz w13, LBB0_24
 ; CHECK-NEXT:  ; %bb.31: ; %while.body104
-; CHECK-NEXT:    ; in Loop: Header=BB0_29 Depth=1
-; CHECK-NEXT:    cmp w12, #42
-; CHECK-NEXT:    b.eq LBB0_28
+; CHECK-NEXT:    ; in Loop: Header=BB0_30 Depth=1
+; CHECK-NEXT:    cmp w12, w13
+; CHECK-NEXT:    b.eq LBB0_29
 ; CHECK-NEXT:  ; %bb.32: ; %while.body104
-; CHECK-NEXT:    ; in Loop: Header=BB0_29 Depth=1
+; CHECK-NEXT:    ; in Loop: Header=BB0_30 Depth=1
+; CHECK-NEXT:    cmp w12, #42
+; CHECK-NEXT:    b.eq LBB0_29
+; CHECK-NEXT:  ; %bb.33: ; %while.body104
+; CHECK-NEXT:    ; in Loop: Header=BB0_30 Depth=1
 ; CHECK-NEXT:    cmp w13, #94
-; CHECK-NEXT:    b.eq LBB0_28
-; CHECK-NEXT:    b LBB0_42
-; CHECK-NEXT:  LBB0_33: ; %if.else123
+; CHECK-NEXT:    b.eq LBB0_29
+; CHECK-NEXT:    b LBB0_43
+; CHECK-NEXT:  LBB0_34: ; %if.else123
 ; CHECK-NEXT:    cmp w13, #1
 ; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    b.ne LBB0_43
-; CHECK-NEXT:  ; %bb.34: ; %if.else123
+; CHECK-NEXT:    b.ne LBB0_44
+; CHECK-NEXT:  ; %bb.35: ; %if.else123
 ; CHECK-NEXT:    cmp w12, #2
-; CHECK-NEXT:    b.ne LBB0_43
-; CHECK-NEXT:  ; %bb.35: ; %while.cond130.preheader
+; CHECK-NEXT:    b.ne LBB0_44
+; CHECK-NEXT:  ; %bb.36: ; %while.cond130.preheader
 ; CHECK-NEXT:    ldrb w8, [x9, x11]
-; CHECK-NEXT:    cbz w8, LBB0_23
-; CHECK-NEXT:  ; %bb.36: ; %land.rhs134.preheader
+; CHECK-NEXT:    cbz w8, LBB0_24
+; CHECK-NEXT:  ; %bb.37: ; %land.rhs134.preheader
 ; CHECK-NEXT:    mov x12, xzr
 ; CHECK-NEXT:    mov w0, #1 ; =0x1
-; CHECK-NEXT:    b LBB0_38
-; CHECK-NEXT:  LBB0_37: ; %if.then152
-; CHECK-NEXT:    ; in Loop: Header=BB0_38 Depth=1
+; CHECK-NEXT:    b LBB0_39
+; CHECK-NEXT:  LBB0_38: ; %if.then152
+; CHECK-NEXT:    ; in Loop: Header=BB0_39 Depth=1
 ; CHECK-NEXT:    add x8, x9, x12
 ; CHECK-NEXT:    add x12, x12, #1
 ; CHECK-NEXT:    add x8, x8, x11
 ; CHECK-NEXT:    ldrb w8, [x8, #1]
-; CHECK-NEXT:    cbz w8, LBB0_43
-; CHECK-NEXT:  LBB0_38: ; %land.rhs134
+; CHECK-NEXT:    cbz w8, LBB0_44
+; CHECK-NEXT:  LBB0_39: ; %land.rhs134
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    add x13, x10, x12
 ; CHECK-NEXT:    ldrb w13, [x13, x11]
-; CHECK-NEXT:    cbz w13, LBB0_23
-; CHECK-NEXT:  ; %bb.39: ; %while.body139
-; CHECK-NEXT:    ; in Loop: Header=BB0_38 Depth=1
-; CHECK-NEXT:    cmp w8, w13
-; CHECK-NEXT:    b.eq LBB0_37
+; CHECK-NEXT:    cbz w13, LBB0_24
 ; CHECK-NEXT:  ; %bb.40: ; %while.body139
-; CHECK-NEXT:    ; in Loop: Header=BB0_38 Depth=1
-; CHECK-NEXT:    cmp w13, #42
-; CHECK-NEXT:    b.eq LBB0_37
+; CHECK-NEXT:    ; in Loop: Header=BB0_39 Depth=1
+; CHECK-NEXT:    cmp w8, w13
+; CHECK-NEXT:    b.eq LBB0_38
 ; CHECK-NEXT:  ; %bb.41: ; %while.body139
-; CHECK-NEXT:    ; in Loop: Header=BB0_38 Depth=1
+; CHECK-NEXT:    ; in Loop: Header=BB0_39 Depth=1
+; CHECK-NEXT:    cmp w13, #42
+; CHECK-NEXT:    b.eq LBB0_38
+; CHECK-NEXT:  ; %bb.42: ; %while.body139
+; CHECK-NEXT:    ; in Loop: Header=BB0_39 Depth=1
 ; CHECK-NEXT:    cmp w8, #94
-; CHECK-NEXT:    b.eq LBB0_37
-; CHECK-NEXT:  LBB0_42:
-; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    b.eq LBB0_38
 ; CHECK-NEXT:  LBB0_43:
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:  LBB0_44:
 ; CHECK-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  LBB0_44: ; %lor.lhs.false47
+; CHECK-NEXT:  LBB0_45: ; %lor.lhs.false47
 ; CHECK-NEXT:    cmp x12, #2
-; CHECK-NEXT:    b.ne LBB0_11
-; CHECK-NEXT:  ; %bb.45: ; %land.lhs.true52
+; CHECK-NEXT:    b.ne LBB0_12
+; CHECK-NEXT:  ; %bb.46: ; %land.lhs.true52
 ; CHECK-NEXT:    add x12, x9, x11
 ; CHECK-NEXT:    mov w0, #1 ; =0x1
 ; CHECK-NEXT:    ldurb w12, [x12, #-1]
 ; CHECK-NEXT:    cmp w12, #73
-; CHECK-NEXT:    b.eq LBB0_43
-; CHECK-NEXT:  ; %bb.46: ; %land.lhs.true52
-; CHECK-NEXT:    cbz w8, LBB0_43
-; CHECK-NEXT:    b LBB0_12
-; CHECK-NEXT:  LBB0_47:
-; CHECK-NEXT:    mov w0, wzr
-; CHECK-NEXT:    ret
+; CHECK-NEXT:    b.eq LBB0_44
+; CHECK-NEXT:  ; %bb.47: ; %land.lhs.true52
+; CHECK-NEXT:    cbz w8, LBB0_44
+; CHECK-NEXT:    b LBB0_13
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh0, Lloh1
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh2, Lloh3
 ; CHECK-NEXT:    .loh AdrpLdrGot Lloh4, Lloh5
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 632fdb391053121..0071fabea8bcdb3 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -142,40 +142,40 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast,+addr-
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    mov x20, x0
-; CHECK-NEXT:    b .LBB4_3
-; CHECK-NEXT:  .LBB4_2: // %LI.latch
-; CHECK-NEXT:    // in Loop: Header=BB4_3 Depth=1
-; CHECK-NEXT:    cmp x22, x19
-; CHECK-NEXT:    mov x22, x23
-; CHECK-NEXT:    b.ge .LBB4_8
-; CHECK-NEXT:  .LBB4_3: // %LI
-; CHECK-NEXT:    // =>This Loop Header: Depth=1
-; CHECK-NEXT:    // Child Loop BB4_6 Depth 2
-; CHECK-NEXT:    mov x21, xzr
-; CHECK-NEXT:    add x23, x22, #1
-; CHECK-NEXT:    b .LBB4_6
-; CHECK-NEXT:  .LBB4_4: // %if.else
-; CHECK-NEXT:    // in Loop: Header=BB4_6 Depth=2
+; CHECK-NEXT:    b .LBB4_7
+; CHECK-NEXT:  .LBB4_2: // %if.else
+; CHECK-NEXT:    // in Loop: Header=BB4_4 Depth=2
 ; CHECK-NEXT:    ldr w0, [x20, x22, lsl #2]
-; CHECK-NEXT:  .LBB4_5: // %LJ.latch
-; CHECK-NEXT:    // in Loop: Header=BB4_6 Depth=2
+; CHECK-NEXT:  .LBB4_3: // %LJ.latch
+; CHECK-NEXT:    // in Loop: Header=BB4_4 Depth=2
 ; CHECK-NEXT:    add x8, x21, #1
 ; CHECK-NEXT:    str w0, [x20, x21, lsl #2]
 ; CHECK-NEXT:    sub x9, x8, #1
 ; CHECK-NEXT:    mov x21, x8
 ; CHECK-NEXT:    cmp x9, x19
-; CHECK-NEXT:    b.ge .LBB4_2
-; CHECK-NEXT:  .LBB4_6: // %LJ
-; CHECK-NEXT:    // Parent Loop BB4_3 Depth=1
+; CHECK-NEXT:    b.ge .LBB4_6
+; CHECK-NEXT:  .LBB4_4: // %LJ
+; CHECK-NEXT:    // Parent Loop BB4_7 Depth=1
 ; CHECK-NEXT:    // => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldr w8, [x20, x21, lsl #2]
-; CHECK-NEXT:    tbz w8, #31, .LBB4_4
-; CHECK-NEXT:  // %bb.7: // %if.then
-; CHECK-NEXT:    // in Loop: Header=BB4_6 Depth=2
+; CHECK-NEXT:    tbz w8, #31, .LBB4_2
+; CHECK-NEXT:  // %bb.5: // %if.then
+; CHECK-NEXT:    // in Loop: Header=BB4_4 Depth=2
 ; CHECK-NEXT:    add x0, x20, x22, lsl #2
 ; CHECK-NEXT:    mov x1, x21
 ; CHECK-NEXT:    bl use
-; CHECK-NEXT:    b .LBB4_5
+; CHECK-NEXT:    b .LBB4_3
+; CHECK-NEXT:  .LBB4_6: // %LI.latch
+; CHECK-NEXT:    // in Loop: Header=BB4_7 Depth=1
+; CHECK-NEXT:    cmp x22, x19
+; CHECK-NEXT:    mov x22, x23
+; CHECK-NEXT:    b.ge .LBB4_8
+; CHECK-NEXT:  .LBB4_7: // %LI
+; CHECK-NEXT:    // =>This Loop Header: Depth=1
+; CHECK-NEXT:    // Child Loop BB4_4 Depth 2
+; CHECK-NEXT:    mov x21, xzr
+; CHECK-NEXT:    add x23, x22, #1
+; CHECK-NEXT:    b .LBB4_4
 ; CHECK-NEXT:  .LBB4_8:
 ; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index fadb774b0822296..7838f2b59fa4426 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -26,13 +26,18 @@ define amdgpu_ps void @main(i32 %0, float %1) {
 ; ISA-NEXT:    s_mov_b64 s[0:1], 0
 ; ISA-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; ISA-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; ISA-NEXT:    s_branch .LBB0_3
-; ISA-NEXT:  .LBB0_1: ; %Flow1
-; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; ISA-NEXT:    s_branch .LBB0_4
+; ISA-NEXT:  .LBB0_1: ; %endif1
+; ISA-NEXT:    ; in Loop: Header=BB0_4 Depth=1
+; ISA-NEXT:    s_mov_b64 s[4:5], -1
+; ISA-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; ISA-NEXT:    s_cbranch_execnz .LBB0_5
+; ISA-NEXT:  .LBB0_2: ; %Flow1
+; ISA-NEXT:    ; in Loop: Header=BB0_4 Depth=1
 ; ISA-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; ISA-NEXT:    s_mov_b64 s[6:7], 0
-; ISA-NEXT:  .LBB0_2: ; %Flow
-; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; ISA-NEXT:  .LBB0_3: ; %Flow
+; ISA-NEXT:    ; in Loop: Header=BB0_4 Depth=1
 ; ISA-NEXT:    s_and_b64 s[10:11], exec, s[4:5]
 ; ISA-NEXT:    s_or_b64 s[0:1], s[10:11], s[0:1]
 ; ISA-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
@@ -40,22 +45,18 @@ define amdgpu_ps void @main(i32 %0, float %1) {
 ; ISA-NEXT:    s_or_b64 s[2:3], s[2:3], s[6:7]
 ; ISA-NEXT:    s_andn2_b64 exec, exec, s[0:1]
 ; ISA-NEXT:    s_cbranch_execz .LBB0_6
-; ISA-NEXT:  .LBB0_3: ; %loop
+; ISA-NEXT:  .LBB0_4: ; %loop
 ; ISA-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; ISA-NEXT:    s_or_b64 s[4:5], s[4:5], exec
 ; ISA-NEXT:    s_cmp_lt_u32 s8, 32
 ; ISA-NEXT:    s_mov_b64 s[6:7], -1
-; ISA-NEXT:    s_cbranch_scc0 .LBB0_2
-; ISA-NEXT:  ; %bb.4: ; %endif1
-; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; ISA-NEXT:    s_mov_b64 s[4:5], -1
-; ISA-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; ISA-NEXT:    s_cbranch_execz .LBB0_1
-; ISA-NEXT:  ; %bb.5: ; %endif2
-; ISA-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; ISA-NEXT:    s_cbranch_scc1 .LBB0_1
+; ISA-NEXT:    s_branch .LBB0_3
+; ISA-NEXT:  .LBB0_5: ; %endif2
+; ISA-NEXT:    ; in Loop: Header=BB0_4 Depth=1
 ; ISA-NEXT:    s_add_i32 s8, s8, 1
 ; ISA-NEXT:    s_xor_b64 s[4:5], exec, -1
-; ISA-NEXT:    s_branch .LBB0_1
+; ISA-NEXT:    s_branch .LBB0_2
 ; ISA-NEXT:  .LBB0_6: ; %Flow2
 ; ISA-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; ISA-NEXT:    v_mov_b32_e32 v1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index b7e1dfb2eda28c0..eb07b05daca5e82 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -10,22 +10,17 @@ name:            loop_header_nopred
 body:             |
   ; GFX10-LABEL: name: loop_header_nopred
   ; GFX10: bb.0:
-  ; GFX10-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-NEXT:   successors: %bb.1(0x80000000)
   ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT:   S_BRANCH %bb.2
-  ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT: bb.1 (align 64):
-  ; GFX10-NEXT:   successors: %bb.7(0x04000000), %bb.2(0x7c000000)
-  ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
+  ; GFX10-NEXT:   S_BRANCH %bb.1
   ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT: bb.2:
-  ; GFX10-NEXT:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
+  ; GFX10-NEXT: bb.3 (align 64):
+  ; GFX10-NEXT:   successors: %bb.4(0x40000000), %bb.6(0x40000000)
   ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.6, implicit $exec
   ; GFX10-NEXT: {{  $}}
-  ; GFX10-NEXT: bb.5:
-  ; GFX10-NEXT:   successors: %bb.1(0x04000000), %bb.5(0x7c000000)
+  ; GFX10-NEXT: bb.4:
+  ; GFX10-NEXT:   successors: %bb.5(0x04000000), %bb.4(0x7c000000)
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT:   S_NOP 0
   ; GFX10-NEXT:   S_NOP 0
@@ -38,29 +33,43 @@ body:             |
   ; GFX10-NEXT:   S_NOP 0
   ; GFX10-NEXT:   S_NOP 0
   ; GFX10-NEXT:   S_NOP 0
-  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
-  ; GFX10-NEXT:   S_BRANCH %bb.1
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.5:
+  ; GFX10-NEXT:   successors: %bb.6(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.6:
+  ; GFX10-NEXT:   successors: %bb.7(0x04000000), %bb.1(0x7c000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   S_CBRANCH_VCCZ %bb.3, implicit $vcc_lo
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   S_BRANCH %bb.3
   ; GFX10-NEXT: {{  $}}
   ; GFX10-NEXT: bb.7:
   ; GFX10-NEXT:   S_ENDPGM 0
+  ;
   ; GFX11-LABEL: name: loop_header_nopred
   ; GFX11: bb.0:
-  ; GFX11-NEXT:   successors: %bb.2(0x80000000)
-  ; GFX11-NEXT: {{  $}}
-  ; GFX11-NEXT:   S_BRANCH %bb.2
+  ; GFX11-NEXT:   successors: %bb.1(0x80000000)
   ; GFX11-NEXT: {{  $}}
-  ; GFX11-NEXT: bb.1:
-  ; GFX11-NEXT:   successors: %bb.7(0x04000000), %bb.2(0x7c000000)
-  ; GFX11-NEXT: {{  $}}
-  ; GFX11-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
+  ; GFX11-NEXT:   S_BRANCH %bb.1
   ; GFX11-NEXT: {{  $}}
-  ; GFX11-NEXT: bb.2:
-  ; GFX11-NEXT:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
+  ; GFX11-NEXT: bb.3:
+  ; GFX11-NEXT:   successors: %bb.4(0x40000000), %bb.6(0x40000000)
   ; GFX11-NEXT: {{  $}}
-  ; GFX11-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GFX11-NEXT:   S_CBRANCH_EXECZ %bb.6, implicit $exec
   ; GFX11-NEXT: {{  $}}
-  ; GFX11-NEXT: bb.5:
-  ; GFX11-NEXT:   successors: %bb.1(0x04000000), %bb.5(0x7c000000)
+  ; GFX11-NEXT: bb.4:
+  ; GFX11-NEXT:   successors: %bb.5(0x04000000), %bb.4(0x7c000000)
   ; GFX11-NEXT: {{  $}}
   ; GFX11-NEXT:   S_NOP 0
   ; GFX11-NEXT:   S_NOP 0
@@ -73,8 +82,26 @@ body:             |
   ; GFX11-NEXT:   S_NOP 0
   ; GFX11-NEXT:   S_NOP 0
   ; GFX11-NEXT:   S_NOP 0
-  ; GFX11-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
-  ; GFX11-NEXT:   S_BRANCH %bb.1
+  ; GFX11-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT: bb.5:
+  ; GFX11-NEXT:   successors: %bb.6(0x80000000)
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT: bb.6:
+  ; GFX11-NEXT:   successors: %bb.7(0x04000000), %bb.1(0x7c000000)
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT: bb.1:
+  ; GFX11-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT:   S_CBRANCH_VCCZ %bb.3, implicit $vcc_lo
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT: bb.2:
+  ; GFX11-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX11-NEXT: {{  $}}
+  ; GFX11-NEXT:   S_BRANCH %bb.3
   ; GFX11-NEXT: {{  $}}
   ; GFX11-NEXT: bb.7:
   ; GFX11-NEXT:   S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
index e2456b74f7ef1fa..363428335b99345 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
@@ -16,36 +16,10 @@ define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
 ; CHECK-NEXT:    s_xor_b32 s6, s4, -1
 ; CHECK-NEXT:    s_inst_prefetch 0x1
-; CHECK-NEXT:    s_branch .LBB0_3
-; CHECK-NEXT:    .p2align 6
-; CHECK-NEXT:  .LBB0_1: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
-; CHECK-NEXT:  .LBB0_2: ; %Flow1
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s7
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s4, 0, v3
-; CHECK-NEXT:    ;;#ASMSTART
-; CHECK-NEXT:    ; j lastloop entry
-; CHECK-NEXT:    ;;#ASMEND
-; CHECK-NEXT:    s_or_b32 s5, s4, s5
-; CHECK-NEXT:    s_andn2_b32 exec_lo, exec_lo, s5
-; CHECK-NEXT:    s_cbranch_execz .LBB0_8
-; CHECK-NEXT:  .LBB0_3: ; %for.body33
-; CHECK-NEXT:    ; =>This Loop Header: Depth=1
-; CHECK-NEXT:    ; Child Loop BB0_6 Depth 2
-; CHECK-NEXT:    v_mov_b32_e32 v4, 0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0
-; CHECK-NEXT:    s_and_saveexec_b32 s7, s6
-; CHECK-NEXT:    s_cbranch_execz .LBB0_2
-; CHECK-NEXT:  ; %bb.4: ; %for.body51.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    s_mov_b32 s8, 0
-; CHECK-NEXT:    s_mov_b32 s9, 0
 ; CHECK-NEXT:    s_branch .LBB0_6
 ; CHECK-NEXT:    .p2align 6
-; CHECK-NEXT:  .LBB0_5: ; %if.end118
-; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT:  .LBB0_1: ; %if.end118
+; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; CHECK-NEXT:    s_add_i32 s9, s9, 4
 ; CHECK-NEXT:    ;;#ASMSTART
@@ -56,20 +30,46 @@ define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49
 ; CHECK-NEXT:    v_add_nc_u32_e32 v4, -4, v4
 ; CHECK-NEXT:    s_or_b32 s8, s4, s8
 ; CHECK-NEXT:    s_andn2_b32 exec_lo, exec_lo, s8
-; CHECK-NEXT:    s_cbranch_execz .LBB0_1
-; CHECK-NEXT:  .LBB0_6: ; %for.body51
-; CHECK-NEXT:    ; Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_4
+; CHECK-NEXT:  .LBB0_2: ; %for.body51
+; CHECK-NEXT:    ; Parent Loop BB0_6 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    v_mov_b32_e32 v3, 1
 ; CHECK-NEXT:    s_and_saveexec_b32 s4, vcc_lo
-; CHECK-NEXT:    s_cbranch_execz .LBB0_5
-; CHECK-NEXT:  ; %bb.7: ; %if.then112
-; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT:    s_cbranch_execz .LBB0_1
+; CHECK-NEXT:  ; %bb.3: ; %if.then112
+; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; CHECK-NEXT:    s_add_i32 s10, s9, 4
 ; CHECK-NEXT:    v_mov_b32_e32 v3, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v4, s10
 ; CHECK-NEXT:    ds_write_b32 v1, v4
-; CHECK-NEXT:    s_branch .LBB0_5
+; CHECK-NEXT:    s_branch .LBB0_1
+; CHECK-NEXT:    .p2align 6
+; CHECK-NEXT:  .LBB0_4: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=1
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:  .LBB0_5: ; %Flow1
+; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=1
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s7
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s4, 0, v3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; j lastloop entry
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_or_b32 s5, s4, s5
+; CHECK-NEXT:    s_andn2_b32 exec_lo, exec_lo, s5
+; CHECK-NEXT:    s_cbranch_execz .LBB0_8
+; CHECK-NEXT:  .LBB0_6: ; %for.body33
+; CHECK-NEXT:    ; =>This Loop Header: Depth=1
+; CHECK-NEXT:    ; Child Loop BB0_2 Depth 2
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    s_and_saveexec_b32 s7, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB0_5
+; CHECK-NEXT:  ; %bb.7: ; %for.body51.preheader
+; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=1
+; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    s_mov_b32 s9, 0
+; CHECK-NEXT:    s_branch .LBB0_2
 ; CHECK-NEXT:  .LBB0_8: ; %for.body159.preheader
 ; CHECK-NEXT:    s_inst_prefetch 0x2
 ; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s5
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
index 0e6c1aecb6774a3..c777d1183deb669 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
@@ -102,28 +102,14 @@ define void @issue63986(i64 %0, i64 %idxprom) {
 ; CHECK-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
 ; CHECK-NEXT:    v_add_co_u32_e32 v8, vcc, v2, v0
 ; CHECK-NEXT:    v_addc_co_u32_e32 v9, vcc, v3, v1, vcc
-; CHECK-NEXT:    s_branch .LBB0_12
-; CHECK-NEXT:  .LBB0_10: ; %Flow19
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
-; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
-; CHECK-NEXT:    s_mov_b64 s[8:9], 0
-; CHECK-NEXT:  .LBB0_11: ; %Flow21
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
-; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_vccz .LBB0_20
-; CHECK-NEXT:  .LBB0_12: ; %while.cond
-; CHECK-NEXT:    ; =>This Loop Header: Depth=1
-; CHECK-NEXT:    ; Child Loop BB0_14 Depth 2
-; CHECK-NEXT:    ; Child Loop BB0_18 Depth 2
-; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_15
-; CHECK-NEXT:  ; %bb.13: ; %loop-memcpy-expansion2.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    s_branch .LBB0_19
+; CHECK-NEXT:  .LBB0_10: ; %loop-memcpy-expansion2.preheader
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
 ; CHECK-NEXT:    s_mov_b64 s[10:11], 0
 ; CHECK-NEXT:    s_mov_b64 s[12:13], 0
 ; CHECK-NEXT:    s_mov_b64 s[14:15], 0
-; CHECK-NEXT:  .LBB0_14: ; %loop-memcpy-expansion2
-; CHECK-NEXT:    ; Parent Loop BB0_12 Depth=1
+; CHECK-NEXT:  .LBB0_11: ; %loop-memcpy-expansion2
+; CHECK-NEXT:    ; Parent Loop BB0_19 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    v_mov_b32_e32 v10, s10
 ; CHECK-NEXT:    v_mov_b32_e32 v11, s11
@@ -169,23 +155,23 @@ define void @issue63986(i64 %0, i64 %idxprom) {
 ; CHECK-NEXT:    flat_store_byte v[10:11], v20 offset:13
 ; CHECK-NEXT:    flat_store_byte v[10:11], v27 offset:12
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_14
-; CHECK-NEXT:  .LBB0_15: ; %Flow20
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_11
+; CHECK-NEXT:  .LBB0_12: ; %Flow20
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], -1
-; CHECK-NEXT:    s_cbranch_execz .LBB0_11
-; CHECK-NEXT:  ; %bb.16: ; %loop-memcpy-residual-header5
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_18
+; CHECK-NEXT:  ; %bb.13: ; %loop-memcpy-residual-header5
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
 ; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
 ; CHECK-NEXT:    s_xor_b64 s[10:11], exec, s[8:9]
-; CHECK-NEXT:    s_cbranch_execz .LBB0_10
-; CHECK-NEXT:  ; %bb.17: ; %loop-memcpy-residual4.preheader
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    s_cbranch_execz .LBB0_17
+; CHECK-NEXT:  ; %bb.14: ; %loop-memcpy-residual4.preheader
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
 ; CHECK-NEXT:    s_mov_b64 s[12:13], 0
 ; CHECK-NEXT:    s_mov_b64 s[14:15], 0
-; CHECK-NEXT:  .LBB0_18: ; %loop-memcpy-residual4
-; CHECK-NEXT:    ; Parent Loop BB0_12 Depth=1
+; CHECK-NEXT:  .LBB0_15: ; %loop-memcpy-residual4
+; CHECK-NEXT:    ; Parent Loop BB0_19 Depth=1
 ; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    v_mov_b32_e32 v12, s15
 ; CHECK-NEXT:    v_add_co_u32_e32 v10, vcc, s14, v0
@@ -200,11 +186,25 @@ define void @issue63986(i64 %0, i64 %idxprom) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    flat_store_byte v[10:11], v13
 ; CHECK-NEXT:    s_andn2_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_cbranch_execnz .LBB0_18
-; CHECK-NEXT:  ; %bb.19: ; %Flow
-; CHECK-NEXT:    ; in Loop: Header=BB0_12 Depth=1
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_15
+; CHECK-NEXT:  ; %bb.16: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[12:13]
-; CHECK-NEXT:    s_branch .LBB0_10
+; CHECK-NEXT:  .LBB0_17: ; %Flow19
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
+; CHECK-NEXT:    s_or_b64 exec, exec, s[10:11]
+; CHECK-NEXT:    s_mov_b64 s[8:9], 0
+; CHECK-NEXT:  .LBB0_18: ; %Flow21
+; CHECK-NEXT:    ; in Loop: Header=BB0_19 Depth=1
+; CHECK-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; CHECK-NEXT:    s_cbranch_vccz .LBB0_20
+; CHECK-NEXT:  .LBB0_19: ; %while.cond
+; CHECK-NEXT:    ; =>This Loop Header: Depth=1
+; CHECK-NEXT:    ; Child Loop BB0_11 Depth 2
+; CHECK-NEXT:    ; Child Loop BB0_15 Depth 2
+; CHECK-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
+; CHECK-NEXT:    s_cbranch_execnz .LBB0_10
+; CHECK-NEXT:    s_branch .LBB0_12
 ; CHECK-NEXT:  .LBB0_20: ; %DummyReturnBlock
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index f7479c847559254..9e383ec0b1bfcf2 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
@@ -46,46 +46,45 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) {
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    s_mov_b64 s[0:1], 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_branch .LBB0_2
-; GCN-NEXT:  .LBB0_1: ; %loop.exit.guard
-; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
-; GCN-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GCN-NEXT:    s_cbranch_execz .LBB0_6
-; GCN-NEXT:  .LBB0_2: ; %LOOP.outer
-; GCN-NEXT:    ; =>This Loop Header: Depth=1
-; GCN-NEXT:    ; Child Loop BB0_4 Depth 2
-; GCN-NEXT:    ; implicit-def: $sgpr6_sgpr7
-; GCN-NEXT:    ; implicit-def: $sgpr2_sgpr3
-; GCN-NEXT:    s_mov_b64 s[4:5], 0
-; GCN-NEXT:    s_branch .LBB0_4
-; GCN-NEXT:  .LBB0_3: ; %Flow
-; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_branch .LBB0_5
+; GCN-NEXT:  .LBB0_1: ; %Flow
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GCN-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
 ; GCN-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
 ; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GCN-NEXT:    s_cbranch_execz .LBB0_1
-; GCN-NEXT:  .LBB0_4: ; %LOOP
-; GCN-NEXT:    ; Parent Loop BB0_2 Depth=1
+; GCN-NEXT:    s_cbranch_execz .LBB0_4
+; GCN-NEXT:  .LBB0_2: ; %LOOP
+; GCN-NEXT:    ; Parent Loop BB0_5 Depth=1
 ; GCN-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, v0, v4
 ; GCN-NEXT:    s_or_b64 s[2:3], s[2:3], exec
 ; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], exec
 ; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB0_3
-; GCN-NEXT:  ; %bb.5: ; %ENDIF
-; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_cbranch_execz .LBB0_1
+; GCN-NEXT:  ; %bb.3: ; %ENDIF
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
 ; GCN-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
 ; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, v5, v0
 ; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
 ; GCN-NEXT:    s_and_b64 s[10:11], vcc, exec
 ; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
-; GCN-NEXT:    s_branch .LBB0_3
-; GCN-NEXT:  .LBB0_6: ; %IF
+; GCN-NEXT:    s_branch .LBB0_1
+; GCN-NEXT:  .LBB0_4: ; %loop.exit.guard
+; GCN-NEXT:    ; in Loop: Header=BB0_5 Depth=1
+; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
+; GCN-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GCN-NEXT:  .LBB0_5: ; %LOOP.outer
+; GCN-NEXT:    ; =>This Loop Header: Depth=1
+; GCN-NEXT:    ; Child Loop BB0_2 Depth 2
+; GCN-NEXT:    ; implicit-def: $sgpr6_sgpr7
+; GCN-NEXT:    ; implicit-def: $sgpr2_sgpr3
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:    s_branch .LBB0_2
+; GCN-NEXT:  ; %bb.6: ; %IF
 ; GCN-NEXT:    s_endpgm
 main_body:
   br label %LOOP.outer
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
index f284df4d8a70b1b..42389d63dfc4f4b 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
@@ -11,47 +11,31 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    s_branch .LBB0_2
-; GCN-NEXT:  .LBB0_1: ; %loop.exit.guard
-; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_and_b64 vcc, exec, s[14:15]
-; GCN-NEXT:    s_cbranch_vccnz .LBB0_9
-; GCN-NEXT:  .LBB0_2: ; %bb1
-; GCN-NEXT:    ; =>This Loop Header: Depth=1
-; GCN-NEXT:    ; Child Loop BB0_4 Depth 2
-; GCN-NEXT:    s_mov_b32 s11, s7
-; GCN-NEXT:    buffer_load_dword v1, off, s[8:11], 0
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, v1
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
-; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
-; GCN-NEXT:    s_mov_b32 s12, s6
-; GCN-NEXT:    s_branch .LBB0_4
-; GCN-NEXT:  .LBB0_3: ; %Flow1
-; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_branch .LBB0_8
+; GCN-NEXT:  .LBB0_1: ; %Flow1
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    s_andn2_b64 vcc, exec, s[16:17]
-; GCN-NEXT:    s_cbranch_vccz .LBB0_1
-; GCN-NEXT:  .LBB0_4: ; %bb2
-; GCN-NEXT:    ; Parent Loop BB0_2 Depth=1
+; GCN-NEXT:    s_cbranch_vccz .LBB0_7
+; GCN-NEXT:  .LBB0_2: ; %bb2
+; GCN-NEXT:    ; Parent Loop BB0_8 Depth=1
 ; GCN-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GCN-NEXT:    s_and_b64 vcc, exec, s[0:1]
 ; GCN-NEXT:    s_lshl_b32 s12, s12, 5
-; GCN-NEXT:    s_cbranch_vccz .LBB0_6
-; GCN-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_cbranch_vccz .LBB0_4
+; GCN-NEXT:  ; %bb.3: ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    s_mov_b64 s[14:15], s[2:3]
-; GCN-NEXT:    s_branch .LBB0_7
-; GCN-NEXT:  .LBB0_6: ; %bb3
-; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_branch .LBB0_5
+; GCN-NEXT:  .LBB0_4: ; %bb3
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    s_add_i32 s12, s12, 1
 ; GCN-NEXT:    s_mov_b64 s[14:15], -1
-; GCN-NEXT:  .LBB0_7: ; %Flow
-; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:  .LBB0_5: ; %Flow
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    s_andn2_b64 vcc, exec, s[14:15]
 ; GCN-NEXT:    s_mov_b64 s[16:17], -1
-; GCN-NEXT:    s_cbranch_vccnz .LBB0_3
-; GCN-NEXT:  ; %bb.8: ; %bb4
-; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_1
+; GCN-NEXT:  ; %bb.6: ; %bb4
+; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=2
 ; GCN-NEXT:    s_ashr_i32 s13, s12, 31
 ; GCN-NEXT:    s_lshl_b64 s[16:17], s[12:13], 2
 ; GCN-NEXT:    s_mov_b64 s[14:15], 0
@@ -60,7 +44,23 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
 ; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
 ; GCN-NEXT:    s_cmp_eq_u32 s12, 32
 ; GCN-NEXT:    s_cselect_b64 s[16:17], -1, 0
-; GCN-NEXT:    s_branch .LBB0_3
+; GCN-NEXT:    s_branch .LBB0_1
+; GCN-NEXT:  .LBB0_7: ; %loop.exit.guard
+; GCN-NEXT:    ; in Loop: Header=BB0_8 Depth=1
+; GCN-NEXT:    s_and_b64 vcc, exec, s[14:15]
+; GCN-NEXT:    s_cbranch_vccnz .LBB0_9
+; GCN-NEXT:  .LBB0_8: ; %bb1
+; GCN-NEXT:    ; =>This Loop Header: Depth=1
+; GCN-NEXT:    ; Child Loop BB0_2 Depth 2
+; GCN-NEXT:    s_mov_b32 s11, s7
+; GCN-NEXT:    buffer_load_dword v1, off, s[8:11], 0
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, v1
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
+; GCN-NEXT:    s_mov_b32 s12, s6
+; GCN-NEXT:    s_branch .LBB0_2
 ; GCN-NEXT:  .LBB0_9: ; %DummyReturnBlock
 ; GCN-NEXT:    s_endpgm
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index a462c19ce645d4a..b23fa568c6466b4 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -451,14 +451,22 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX8-NEXT:    s_movk_i32 s0, 0x7f
-; GFX8-NEXT:  .LBB1_1: ; %for.cond.preheader
+; GFX8-NEXT:    s_branch .LBB1_3
+; GFX8-NEXT:  .LBB1_1: ; %while.cond.loopexit
+; GFX8-NEXT:    ; in Loop: Header=BB1_3 Depth=1
+; GFX8-NEXT:    s_add_i32 s1, s0, -1
+; GFX8-NEXT:    s_cmp_eq_u32 s0, 0
+; GFX8-NEXT:    s_cbranch_scc1 .LBB1_5
+; GFX8-NEXT:  ; %bb.2: ; in Loop: Header=BB1_3 Depth=1
+; GFX8-NEXT:    s_mov_b32 s0, s1
+; GFX8-NEXT:  .LBB1_3: ; %for.cond.preheader
 ; GFX8-NEXT:    ; =>This Loop Header: Depth=1
-; GFX8-NEXT:    ; Child Loop BB1_2 Depth 2
+; GFX8-NEXT:    ; Child Loop BB1_4 Depth 2
 ; GFX8-NEXT:    v_mov_b32_e32 v6, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX8-NEXT:    s_mov_b32 s1, 0
-; GFX8-NEXT:  .LBB1_2: ; %for.body
-; GFX8-NEXT:    ; Parent Loop BB1_1 Depth=1
+; GFX8-NEXT:  .LBB1_4: ; %for.body
+; GFX8-NEXT:    ; Parent Loop BB1_3 Depth=1
 ; GFX8-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0xffffb000, v5
 ; GFX8-NEXT:    v_addc_u32_e32 v8, vcc, -1, v6, vcc
@@ -528,14 +536,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v27, v3
 ; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, v28, v4, vcc
-; GFX8-NEXT:    s_cbranch_scc0 .LBB1_2
-; GFX8-NEXT:  ; %bb.3: ; %while.cond.loopexit
-; GFX8-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX8-NEXT:    s_add_i32 s1, s0, -1
-; GFX8-NEXT:    s_cmp_eq_u32 s0, 0
-; GFX8-NEXT:    s_cbranch_scc1 .LBB1_5
-; GFX8-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX8-NEXT:    s_mov_b32 s0, s1
+; GFX8-NEXT:    s_cbranch_scc0 .LBB1_4
 ; GFX8-NEXT:    s_branch .LBB1_1
 ; GFX8-NEXT:  .LBB1_5: ; %while.end
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s35
@@ -582,14 +583,22 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    s_movk_i32 s0, 0xd000
 ; GFX900-NEXT:    s_movk_i32 s1, 0xe000
 ; GFX900-NEXT:    s_movk_i32 s3, 0xf000
-; GFX900-NEXT:  .LBB1_1: ; %for.cond.preheader
+; GFX900-NEXT:    s_branch .LBB1_3
+; GFX900-NEXT:  .LBB1_1: ; %while.cond.loopexit
+; GFX900-NEXT:    ; in Loop: Header=BB1_3 Depth=1
+; GFX900-NEXT:    s_add_i32 s4, s2, -1
+; GFX900-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX900-NEXT:    s_cbranch_scc1 .LBB1_5
+; GFX900-NEXT:  ; %bb.2: ; in Loop: Header=BB1_3 Depth=1
+; GFX900-NEXT:    s_mov_b32 s2, s4
+; GFX900-NEXT:  .LBB1_3: ; %for.cond.preheader
 ; GFX900-NEXT:    ; =>This Loop Header: Depth=1
-; GFX900-NEXT:    ; Child Loop BB1_2 Depth 2
+; GFX900-NEXT:    ; Child Loop BB1_4 Depth 2
 ; GFX900-NEXT:    v_mov_b32_e32 v6, v2
 ; GFX900-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX900-NEXT:    s_mov_b32 s4, 0
-; GFX900-NEXT:  .LBB1_2: ; %for.body
-; GFX900-NEXT:    ; Parent Loop BB1_1 Depth=1
+; GFX900-NEXT:  .LBB1_4: ; %for.body
+; GFX900-NEXT:    ; Parent Loop BB1_3 Depth=1
 ; GFX900-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GFX900-NEXT:    v_add_co_u32_e32 v7, vcc, 0xffffb000, v5
 ; GFX900-NEXT:    v_addc_co_u32_e32 v8, vcc, -1, v6, vcc
@@ -646,14 +655,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
 ; GFX900-NEXT:    v_add_co_u32_e32 v3, vcc, v29, v3
 ; GFX900-NEXT:    v_addc_co_u32_e32 v4, vcc, v30, v4, vcc
-; GFX900-NEXT:    s_cbranch_scc0 .LBB1_2
-; GFX900-NEXT:  ; %bb.3: ; %while.cond.loopexit
-; GFX900-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX900-NEXT:    s_add_i32 s4, s2, -1
-; GFX900-NEXT:    s_cmp_eq_u32 s2, 0
-; GFX900-NEXT:    s_cbranch_scc1 .LBB1_5
-; GFX900-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX900-NEXT:    s_mov_b32 s2, s4
+; GFX900-NEXT:    s_cbranch_scc0 .LBB1_4
 ; GFX900-NEXT:    s_branch .LBB1_1
 ; GFX900-NEXT:  .LBB1_5: ; %while.end
 ; GFX900-NEXT:    v_mov_b32_e32 v1, s35
@@ -695,14 +697,22 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, s35, v2, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, 0x5000, v1
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
-; GFX10-NEXT:  .LBB1_1: ; %for.cond.preheader
+; GFX10-NEXT:    s_branch .LBB1_3
+; GFX10-NEXT:  .LBB1_1: ; %while.cond.loopexit
+; GFX10-NEXT:    ; in Loop: Header=BB1_3 Depth=1
+; GFX10-NEXT:    s_add_i32 s0, s1, -1
+; GFX10-NEXT:    s_cmp_eq_u32 s1, 0
+; GFX10-NEXT:    s_cbranch_scc1 .LBB1_5
+; GFX10-NEXT:  ; %bb.2: ; in Loop: Header=BB1_3 Depth=1
+; GFX10-NEXT:    s_mov_b32 s1, s0
+; GFX10-NEXT:  .LBB1_3: ; %for.cond.preheader
 ; GFX10-NEXT:    ; =>This Loop Header: Depth=1
-; GFX10-NEXT:    ; Child Loop BB1_2 Depth 2
+; GFX10-NEXT:    ; Child Loop BB1_4 Depth 2
 ; GFX10-NEXT:    v_mov_b32_e32 v6, v2
 ; GFX10-NEXT:    v_mov_b32_e32 v5, v1
 ; GFX10-NEXT:    s_mov_b32 s2, 0
-; GFX10-NEXT:  .LBB1_2: ; %for.body
-; GFX10-NEXT:    ; Parent Loop BB1_1 Depth=1
+; GFX10-NEXT:  .LBB1_4: ; %for.body
+; GFX10-NEXT:    ; Parent Loop BB1_3 Depth=1
 ; GFX10-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GFX10-NEXT:    v_add_co_u32 v7, vcc_lo, v5, 0xffffb800
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, -1, v6, vcc_lo
@@ -761,14 +771,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v3, vcc_lo, v31, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v32, v4, vcc_lo
-; GFX10-NEXT:    s_cbranch_scc0 .LBB1_2
-; GFX10-NEXT:  ; %bb.3: ; %while.cond.loopexit
-; GFX10-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX10-NEXT:    s_add_i32 s0, s1, -1
-; GFX10-NEXT:    s_cmp_eq_u32 s1, 0
-; GFX10-NEXT:    s_cbranch_scc1 .LBB1_5
-; GFX10-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX10-NEXT:    s_mov_b32 s1, s0
+; GFX10-NEXT:    s_cbranch_scc0 .LBB1_4
 ; GFX10-NEXT:    s_branch .LBB1_1
 ; GFX10-NEXT:  .LBB1_5: ; %while.end
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, s34, v0
@@ -813,13 +816,21 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    s_movk_i32 s0, 0xd000
 ; GFX90A-NEXT:    s_movk_i32 s1, 0xe000
 ; GFX90A-NEXT:    s_movk_i32 s3, 0xf000
-; GFX90A-NEXT:  .LBB1_1: ; %for.cond.preheader
+; GFX90A-NEXT:    s_branch .LBB1_3
+; GFX90A-NEXT:  .LBB1_1: ; %while.cond.loopexit
+; GFX90A-NEXT:    ; in Loop: Header=BB1_3 Depth=1
+; GFX90A-NEXT:    s_add_i32 s4, s2, -1
+; GFX90A-NEXT:    s_cmp_eq_u32 s2, 0
+; GFX90A-NEXT:    s_cbranch_scc1 .LBB1_5
+; GFX90A-NEXT:  ; %bb.2: ; in Loop: Header=BB1_3 Depth=1
+; GFX90A-NEXT:    s_mov_b32 s2, s4
+; GFX90A-NEXT:  .LBB1_3: ; %for.cond.preheader
 ; GFX90A-NEXT:    ; =>This Loop Header: Depth=1
-; GFX90A-NEXT:    ; Child Loop BB1_2 Depth 2
+; GFX90A-NEXT:    ; Child Loop BB1_4 Depth 2
 ; GFX90A-NEXT:    v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[0,1]
 ; GFX90A-NEXT:    s_mov_b32 s4, 0
-; GFX90A-NEXT:  .LBB1_2: ; %for.body
-; GFX90A-NEXT:    ; Parent Loop BB1_1 Depth=1
+; GFX90A-NEXT:  .LBB1_4: ; %for.body
+; GFX90A-NEXT:    ; Parent Loop BB1_3 Depth=1
 ; GFX90A-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GFX90A-NEXT:    v_add_co_u32_e32 v12, vcc, 0xffffb000, v6
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v13, vcc, -1, v7, vcc
@@ -876,14 +887,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_add_co_u32_e32 v4, vcc, v30, v1
 ; GFX90A-NEXT:    v_addc_co_u32_e32 v5, vcc, v31, v5, vcc
-; GFX90A-NEXT:    s_cbranch_scc0 .LBB1_2
-; GFX90A-NEXT:  ; %bb.3: ; %while.cond.loopexit
-; GFX90A-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX90A-NEXT:    s_add_i32 s4, s2, -1
-; GFX90A-NEXT:    s_cmp_eq_u32 s2, 0
-; GFX90A-NEXT:    s_cbranch_scc1 .LBB1_5
-; GFX90A-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX90A-NEXT:    s_mov_b32 s2, s4
+; GFX90A-NEXT:    s_cbranch_scc0 .LBB1_4
 ; GFX90A-NEXT:    s_branch .LBB1_1
 ; GFX90A-NEXT:  .LBB1_5: ; %while.end
 ; GFX90A-NEXT:    v_mov_b32_e32 v1, s35
@@ -918,14 +922,22 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX11-NEXT:    v_add_co_u32 v1, vcc_lo, 0x5000, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
-; GFX11-NEXT:  .LBB1_1: ; %for.cond.preheader
+; GFX11-NEXT:    s_branch .LBB1_3
+; GFX11-NEXT:  .LBB1_1: ; %while.cond.loopexit
+; GFX11-NEXT:    ; in Loop: Header=BB1_3 Depth=1
+; GFX11-NEXT:    s_add_i32 s0, s1, -1
+; GFX11-NEXT:    s_cmp_eq_u32 s1, 0
+; GFX11-NEXT:    s_cbranch_scc1 .LBB1_5
+; GFX11-NEXT:  ; %bb.2: ; in Loop: Header=BB1_3 Depth=1
+; GFX11-NEXT:    s_mov_b32 s1, s0
+; GFX11-NEXT:  .LBB1_3: ; %for.cond.preheader
 ; GFX11-NEXT:    ; =>This Loop Header: Depth=1
-; GFX11-NEXT:    ; Child Loop BB1_2 Depth 2
+; GFX11-NEXT:    ; Child Loop BB1_4 Depth 2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v5, v1
 ; GFX11-NEXT:    s_mov_b32 s2, 0
-; GFX11-NEXT:  .LBB1_2: ; %for.body
-; GFX11-NEXT:    ; Parent Loop BB1_1 Depth=1
+; GFX11-NEXT:  .LBB1_4: ; %for.body
+; GFX11-NEXT:    ; Parent Loop BB1_3 Depth=1
 ; GFX11-NEXT:    ; => This Inner Loop Header: Depth=2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_co_u32 v7, vcc_lo, v5, 0xffffc000
@@ -1000,14 +1012,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
 ; GFX11-NEXT:    v_add_co_u32 v3, vcc_lo, v27, v3
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v28, v4, vcc_lo
-; GFX11-NEXT:    s_cbranch_scc0 .LBB1_2
-; GFX11-NEXT:  ; %bb.3: ; %while.cond.loopexit
-; GFX11-NEXT:    ; in Loop: Header=BB1_1 Depth=1
-; GFX11-NEXT:    s_add_i32 s0, s1, -1
-; GFX11-NEXT:    s_cmp_eq_u32 s1, 0
-; GFX11-NEXT:    s_cbranch_scc1 .LBB1_5
-; GFX11-NEXT:  ; %bb.4: ; in Loop: Header=BB1_1 Depth=1
-; GFX11-NEXT:    s_mov_b32 s1, s0
+; GFX11-NEXT:    s_cbranch_scc0 .LBB1_4
 ; GFX11-NEXT:    s_branch .LBB1_1
 ; GFX11-NEXT:  .LBB1_5: ; %while.end
 ; GFX11-NEXT:    v_add_co_u32 v0, s0, s34, v0
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index 01d72f134aacbd9..c93680b339f3d7b 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -36,9 +36,8 @@ entry:
   br i1 %0, label %bb5, label %bb.nph15
 
 bb1:                                              ; preds = %bb2.preheader, %bb1
-; CHECK: LBB1_[[BB3:.]]: @ %bb3
-; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
-; CHECK: bmi LBB1_[[BB3]]
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
   %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
   %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
@@ -50,8 +49,10 @@ bb1:                                              ; preds = %bb2.preheader, %bb1
   br i1 %exitcond, label %bb3, label %bb1
 
 bb3:                                              ; preds = %bb1, %bb2.preheader
-; CHECK: LBB1_[[BB1:.]]: @ %bb1
-; CHECK: bne LBB1_[[BB1]]
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+; CHECK: bpl
+; CHECK-NEXT: b LBB1_[[BB3]]
   %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
   %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
   %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/loop-indexing.ll b/llvm/test/CodeGen/ARM/loop-indexing.ll
index 110342c7f3ba73e..5b353c09ccab34a 100644
--- a/llvm/test/CodeGen/ARM/loop-indexing.ll
+++ b/llvm/test/CodeGen/ARM/loop-indexing.ll
@@ -1,17 +1,17 @@
 ; RUN: llc --mtriple=thumbv7em -mattr=+fp-armv8 -O3 %s -o - | \
-; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT --check-prefix=CHECK-T2
+; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT --check-prefix=CHECK-T2 --check-prefix=CHECK-T2-NOCOMPLEX
 
 ; RUN: llc --mtriple=thumbv7em -mattr=+fp-armv8 -O3 -lsr-preferred-addressing-mode=none %s -o - | \
 ; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
 
 ; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -O3 %s -o - | \
-; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT --check-prefix=CHECK-T2
+; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT --check-prefix=CHECK-T2 --check-prefix=CHECK-T2-NOCOMPLEX
 
 ; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -lsr-preferred-addressing-mode=postindexed %s -o - | \
 ; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
 
 ; RUN: llc -mtriple=thumbv8m.main -mattr=+fp-armv8,+dsp -lsr-preferred-addressing-mode=preindexed %s -o - | \
-; RUN:    FileCheck %s --check-prefixes=CHECK,CHECK-T2
+; RUN:    FileCheck %s --check-prefixes=CHECK,CHECK-T2 --check-prefix=CHECK-T2-NOCOMPLEX
 
 ; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
 ; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLED
@@ -620,10 +620,6 @@ for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.
 }
 
 ; CHECK-LABEL: mul_16x16_2d
-; CHECK: @ %for.body4.us
-
-; CHECK-DEFAULT: ldr{{.*}}, #16]!
-; CHECK-DEFAULT: ldrsh{{.*}}, #8]!
 
 ; DISABLED-NOT: ldr{{.*}}]!
 ; DISABLED-NOT: str{{.*}}]!
@@ -632,6 +628,10 @@ for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.
 ; CHECK-T2: ldrsh{{.*}}, #2]!
 ; CHECK-T2: ldr{{.*}}, #4]!
 
+; CHECK: @ %for.body4.us
+; CHECK-DEFAULT: ldr{{.*}}, #16]!
+; CHECK-DEFAULT: ldrsh{{.*}}, #8]!
+
 define void @mul_16x16_2d(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture readonly %C, i32 %N, i32 %M) {
 entry:
   %cmp24 = icmp eq i32 %N, 0
@@ -849,9 +849,12 @@ for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.
 }
 
 ; CHECK-LABEL: mac_16x16_2d
-; CHECK: @ %for.body4.us
+
+; CHECK-T2-NOCOMPLEX: @ %for.body4.us.epil
+; CHECK-T2-NOCOMPLEX: ldrsh{{.*}}, #2]!
 
 ; TODO: pre-indexed loads for both input arrays.
+; CHECK: @ %for.body4.us
 ; CHECK-DEFAULT: ldrsh{{.*}}, #8]!
 ; CHECK-DEFAULT-NOT: ldr{{.*}}]!
 
@@ -860,8 +863,8 @@ for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.
 
 ; DISABLED-NOT: ldr{{.*}}]!
 
-; CHECK-T2: @ %for.body4.us.epil
-; CHECK-T2: ldrsh{{.*}}, #2]!
+; CHECK-COMPLEX: @ %for.body4.us.epil
+; CHECK-COMPLEX: ldrsh{{.*}}, #2]!
 
 define void @mac_16x16_2d(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture %C, i32 %N, i32 %M) {
 entry:
diff --git a/llvm/test/CodeGen/AVR/rot.ll b/llvm/test/CodeGen/AVR/rot.ll
index 1a6b917af95b7aa..fd99d02bd23787d 100644
--- a/llvm/test/CodeGen/AVR/rot.ll
+++ b/llvm/test/CodeGen/AVR/rot.ll
@@ -6,27 +6,27 @@ define i8 @rol8(i8 %val, i8 %amt) {
 ; CHECK-LABEL: rol8:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    andi r22, 7
-; CHECK-NEXT:    dec r22
-; CHECK-NEXT:    brmi .LBB0_2
-; CHECK-NEXT:  .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    rjmp .LBB0_2
+; CHECK-NEXT:  .LBB0_1: ; in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    lsl r24
 ; CHECK-NEXT:    adc r24, r1
+; CHECK-NEXT:  .LBB0_2: ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    dec r22
 ; CHECK-NEXT:    brpl .LBB0_1
-; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    ret
 ;
 ; TINY-LABEL: rol8:
 ; TINY:       ; %bb.0:
 ; TINY-NEXT:    andi r22, 7
-; TINY-NEXT:    dec r22
-; TINY-NEXT:    brmi .LBB0_2
-; TINY-NEXT:  .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; TINY-NEXT:    rjmp .LBB0_2
+; TINY-NEXT:  .LBB0_1: ; in Loop: Header=BB0_2 Depth=1
 ; TINY-NEXT:    lsl r24
 ; TINY-NEXT:    adc r24, r17
+; TINY-NEXT:  .LBB0_2: ; =>This Inner Loop Header: Depth=1
 ; TINY-NEXT:    dec r22
 ; TINY-NEXT:    brpl .LBB0_1
-; TINY-NEXT:  .LBB0_2:
+; TINY-NEXT:  ; %bb.3:
 ; TINY-NEXT:    ret
   %mod = urem i8 %amt, 8
   %inv = sub i8 8, %mod
@@ -41,29 +41,29 @@ define i8 @ror8(i8 %val, i8 %amt) {
 ; CHECK-LABEL: ror8:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    andi r22, 7
-; CHECK-NEXT:    dec r22
-; CHECK-NEXT:    brmi .LBB1_2
-; CHECK-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    rjmp .LBB1_2
+; CHECK-NEXT:  .LBB1_1: ; in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    bst r24, 0
 ; CHECK-NEXT:    ror r24
 ; CHECK-NEXT:    bld r24, 7
+; CHECK-NEXT:  .LBB1_2: ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    dec r22
 ; CHECK-NEXT:    brpl .LBB1_1
-; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    ret
 ;
 ; TINY-LABEL: ror8:
 ; TINY:       ; %bb.0:
 ; TINY-NEXT:    andi r22, 7
-; TINY-NEXT:    dec r22
-; TINY-NEXT:    brmi .LBB1_2
-; TINY-NEXT:  .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; TINY-NEXT:    rjmp .LBB1_2
+; TINY-NEXT:  .LBB1_1: ; in Loop: Header=BB1_2 Depth=1
 ; TINY-NEXT:    bst r24, 0
 ; TINY-NEXT:    ror r24
 ; TINY-NEXT:    bld r24, 7
+; TINY-NEXT:  .LBB1_2: ; =>This Inner Loop Header: Depth=1
 ; TINY-NEXT:    dec r22
 ; TINY-NEXT:    brpl .LBB1_1
-; TINY-NEXT:  .LBB1_2:
+; TINY-NEXT:  ; %bb.3:
 ; TINY-NEXT:    ret
   %mod = urem i8 %amt, 8
   %inv = sub i8 8, %mod
diff --git a/llvm/test/CodeGen/AVR/rotate.ll b/llvm/test/CodeGen/AVR/rotate.ll
index 79ff7928b3a3984..38fbd3f5f0ee90a 100644
--- a/llvm/test/CodeGen/AVR/rotate.ll
+++ b/llvm/test/CodeGen/AVR/rotate.ll
@@ -53,15 +53,16 @@ define i8 @rotl8_dyn(i8 %x, i8 %y) {
 ; CHECK-LABEL: rotl8_dyn:
 ; CHECK:       ; %bb.0: ; %start
 ; CHECK-NEXT:    andi r22, 7
-; CHECK-NEXT:    dec r22
-; CHECK-NEXT:    brmi .LBB4_2
+; CHECK-NEXT:    rjmp .LBB4_2
 ; CHECK-NEXT:  .LBB4_1: ; %start
-; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ; in Loop: Header=BB4_2 Depth=1
 ; CHECK-NEXT:    lsl r24
 ; CHECK-NEXT:    adc r24, r1
+; CHECK-NEXT:  .LBB4_2: ; %start
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    dec r22
 ; CHECK-NEXT:    brpl .LBB4_1
-; CHECK-NEXT:  .LBB4_2: ; %start
+; CHECK-NEXT:  ; %bb.3: ; %start
 ; CHECK-NEXT:    ret
 start:
   %0 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 %y)
@@ -120,16 +121,17 @@ define i8 @rotr8_dyn(i8 %x, i8 %y) {
 ; CHECK-LABEL: rotr8_dyn:
 ; CHECK:       ; %bb.0: ; %start
 ; CHECK-NEXT:    andi r22, 7
-; CHECK-NEXT:    dec r22
-; CHECK-NEXT:    brmi .LBB9_2
+; CHECK-NEXT:    rjmp .LBB9_2
 ; CHECK-NEXT:  .LBB9_1: ; %start
-; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ; in Loop: Header=BB9_2 Depth=1
 ; CHECK-NEXT:    bst r24, 0
 ; CHECK-NEXT:    ror r24
 ; CHECK-NEXT:    bld r24, 7
+; CHECK-NEXT:  .LBB9_2: ; %start
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    dec r22
 ; CHECK-NEXT:    brpl .LBB9_1
-; CHECK-NEXT:  .LBB9_2: ; %start
+; CHECK-NEXT:  ; %bb.3: ; %start
 ; CHECK-NEXT:    ret
 start:
   %0 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %y)
diff --git a/llvm/test/CodeGen/AVR/shift.ll b/llvm/test/CodeGen/AVR/shift.ll
index c0abc77c9b14ae1..e2de4f5d40be269 100644
--- a/llvm/test/CodeGen/AVR/shift.ll
+++ b/llvm/test/CodeGen/AVR/shift.ll
@@ -5,13 +5,13 @@
 define i8 @shift_i8_i8_speed(i8 %a, i8 %b) {
 ; CHECK-LABEL: shift_i8_i8_speed:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    dec r22
-; CHECK-NEXT:    brmi .LBB0_2
-; CHECK-NEXT:  .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    rjmp .LBB0_2
+; CHECK-NEXT:  .LBB0_1: ; in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    lsl r24
+; CHECK-NEXT:  .LBB0_2: ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    dec r22
 ; CHECK-NEXT:    brpl .LBB0_1
-; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    ret
   %result = shl i8 %a, %b
   ret i8 %result
@@ -36,14 +36,14 @@ define i8 @shift_i8_i8_size(i8 %a, i8 %b) optsize {
 define i16 @shift_i16_i16(i16 %a, i16 %b) {
 ; CHECK-LABEL: shift_i16_i16:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    dec r22
-; CHECK-NEXT:    brmi .LBB2_2
-; CHECK-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    rjmp .LBB2_2
+; CHECK-NEXT:  .LBB2_1: ; in Loop: Header=BB2_2 Depth=1
 ; CHECK-NEXT:    lsl r24
 ; CHECK-NEXT:    rol r25
+; CHECK-NEXT:  .LBB2_2: ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    dec r22
 ; CHECK-NEXT:    brpl .LBB2_1
-; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    ret
   %result = shl i16 %a, %b
   ret i16 %result
diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
index 364eadd64c7556d..cdbf0690356b628 100644
--- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll
+++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
@@ -523,7 +523,6 @@ define i32 @foo18(i32 %in) !prof !14 !section_prefix !15 {
 ; MFS-DEFAULTS-LABEL:        foo18
 ; MFS-DEFAULTS:              .section        .text.split.foo18
 ; MFS-DEFAULTS-NEXT:         foo18.cold:
-; MFS-DEFAULTS-SAME:           %common.ret
 ; MFS-DEFAULTS-X86-DAG:        jmp     qux
 ; MFS-DEFAULTS-X86-DAG:        jmp     bam
 ; MFS-DEFAULTS-AARCH64-NOT:    b       bar
diff --git a/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll b/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll
index 0e6aadf092db9aa..18144fdb1d9caaf 100644
--- a/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll
+++ b/llvm/test/CodeGen/Hexagon/lsr-postinc-nested-loop.ll
@@ -2,10 +2,10 @@
 ; Test to ensure LSR does not optimize out addrec of the outerloop.
 ; This will help to generate post-increment instructions, otherwise
 ; it end up an as extra reg+reg add inside the loop.
-; CHECK:  loop0(.LBB0_[[LOOP:.]],
-; CHECK: .LBB0_[[LOOP]]:
+; CHECK: .LBB0_[[LOOP:.]]:{{.*}}// %for.body4.us
 ; CHECK: memuh{{.*}}++
 ; CHECK: endloop
+; CHECK:  loop0(.LBB0_[[LOOP]],
 
 
 define dso_local signext i16 @foo(ptr nocapture readonly %filt, ptr nocapture readonly %inp, i32 %c1, i32 %c2) local_unnamed_addr {
diff --git a/llvm/test/CodeGen/Hexagon/prof-early-if.ll b/llvm/test/CodeGen/Hexagon/prof-early-if.ll
index 5f0c9c8f10a5891..db505ed9e1a2c30 100644
--- a/llvm/test/CodeGen/Hexagon/prof-early-if.ll
+++ b/llvm/test/CodeGen/Hexagon/prof-early-if.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O2 -march=hexagon < %s | FileCheck %s
 ; Rely on the comments generated by llc. Check that "if.then" was not predicated.
-; CHECK: b5
 ; CHECK: b2
+; CHECK: b5
 ; CHECK-NOT: if{{.*}}memd
 
 %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] }
diff --git a/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll b/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll
index c356353a0264ddc..743bfbfe7e621bf 100644
--- a/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll
+++ b/llvm/test/CodeGen/Hexagon/swp-multi-loops.ll
@@ -3,18 +3,18 @@
 ; Make sure we attempt to pipeline all inner most loops.
 
 ; Check if the first loop is pipelined.
-; CHECK: loop0(.LBB0_[[LOOP:.]],
-; CHECK: .LBB0_[[LOOP]]:
+; CHECK: .LBB0_[[LOOP:.]]:{{.*}}// Block address taken
 ; CHECK: add(r{{[0-9]+}},r{{[0-9]+}})
 ; CHECK-NEXT: memw(r{{[0-9]+}}++#4)
 ; CHECK-NEXT: endloop0
+; CHECK: loop0(.LBB0_[[LOOP:.]],
 
 ; Check if the second loop is pipelined.
-; CHECK: loop0(.LBB0_[[LOOP:.]],
-; CHECK: .LBB0_[[LOOP]]:
+; CHECK: .LBB0_[[LOOP:.]]:{{.*}}// Block address taken
 ; CHECK: add(r{{[0-9]+}},r{{[0-9]+}})
 ; CHECK-NEXT: memw(r{{[0-9]+}}++#4)
 ; CHECK-NEXT: endloop0
+; CHECK: loop0(.LBB0_[[LOOP:.]],
 
 define i32 @test(ptr %a, i32 %n, i32 %l) {
 entry:
diff --git a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
index b079169974d8b85..ff863a19b4918fb 100644
--- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
+++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/jumptables.ll
@@ -40,7 +40,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS32R2-NEXT:    addiu $sp, $sp, -16
 ; MIPS32R2-NEXT:    .cfi_def_cfa_offset 16
 ; MIPS32R2-NEXT:    sltiu $1, $4, 7
-; MIPS32R2-NEXT:    beqz $1, $BB0_3
+; MIPS32R2-NEXT:    beqz $1, $BB0_9
 ; MIPS32R2-NEXT:    sw $4, 4($sp)
 ; MIPS32R2-NEXT:  $BB0_1: # %entry
 ; MIPS32R2-NEXT:    sll $1, $4, 2
@@ -54,39 +54,39 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str)
 ; MIPS32R2-NEXT:    j $BB0_10
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_3: # %sw.epilog
-; MIPS32R2-NEXT:    lui $1, %hi($.str.7)
-; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.7)
-; MIPS32R2-NEXT:    j $BB0_10
-; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_4: # %sw.bb1
+; MIPS32R2-NEXT:  $BB0_3: # %sw.bb1
 ; MIPS32R2-NEXT:    lui $1, %hi($.str.1)
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.1)
 ; MIPS32R2-NEXT:    j $BB0_10
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_5: # %sw.bb2
+; MIPS32R2-NEXT:  $BB0_4: # %sw.bb2
 ; MIPS32R2-NEXT:    lui $1, %hi($.str.2)
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.2)
 ; MIPS32R2-NEXT:    j $BB0_10
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_6: # %sw.bb3
+; MIPS32R2-NEXT:  $BB0_5: # %sw.bb3
 ; MIPS32R2-NEXT:    lui $1, %hi($.str.3)
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.3)
 ; MIPS32R2-NEXT:    j $BB0_10
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_7: # %sw.bb4
+; MIPS32R2-NEXT:  $BB0_6: # %sw.bb4
 ; MIPS32R2-NEXT:    lui $1, %hi($.str.4)
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.4)
 ; MIPS32R2-NEXT:    j $BB0_10
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_8: # %sw.bb5
+; MIPS32R2-NEXT:  $BB0_7: # %sw.bb5
 ; MIPS32R2-NEXT:    lui $1, %hi($.str.5)
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.5)
 ; MIPS32R2-NEXT:    j $BB0_10
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
-; MIPS32R2-NEXT:  $BB0_9: # %sw.bb6
+; MIPS32R2-NEXT:  $BB0_8: # %sw.bb6
 ; MIPS32R2-NEXT:    lui $1, %hi($.str.6)
 ; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.6)
+; MIPS32R2-NEXT:    j $BB0_10
+; MIPS32R2-NEXT:    sw $1, 8($sp)
+; MIPS32R2-NEXT:  $BB0_9: # %sw.epilog
+; MIPS32R2-NEXT:    lui $1, %hi($.str.7)
+; MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.7)
 ; MIPS32R2-NEXT:    sw $1, 8($sp)
 ; MIPS32R2-NEXT:  $BB0_10: # %return
 ; MIPS32R2-NEXT:    lw $2, 8($sp)
@@ -98,7 +98,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS32R6-NEXT:    addiu $sp, $sp, -16
 ; MIPS32R6-NEXT:    .cfi_def_cfa_offset 16
 ; MIPS32R6-NEXT:    sltiu $1, $4, 7
-; MIPS32R6-NEXT:    beqz $1, $BB0_3
+; MIPS32R6-NEXT:    beqz $1, $BB0_9
 ; MIPS32R6-NEXT:    sw $4, 4($sp)
 ; MIPS32R6-NEXT:  $BB0_1: # %entry
 ; MIPS32R6-NEXT:    sll $1, $4, 2
@@ -112,39 +112,39 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str)
 ; MIPS32R6-NEXT:    j $BB0_10
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_3: # %sw.epilog
-; MIPS32R6-NEXT:    lui $1, %hi($.str.7)
-; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.7)
-; MIPS32R6-NEXT:    j $BB0_10
-; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_4: # %sw.bb1
+; MIPS32R6-NEXT:  $BB0_3: # %sw.bb1
 ; MIPS32R6-NEXT:    lui $1, %hi($.str.1)
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.1)
 ; MIPS32R6-NEXT:    j $BB0_10
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_5: # %sw.bb2
+; MIPS32R6-NEXT:  $BB0_4: # %sw.bb2
 ; MIPS32R6-NEXT:    lui $1, %hi($.str.2)
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.2)
 ; MIPS32R6-NEXT:    j $BB0_10
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_6: # %sw.bb3
+; MIPS32R6-NEXT:  $BB0_5: # %sw.bb3
 ; MIPS32R6-NEXT:    lui $1, %hi($.str.3)
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.3)
 ; MIPS32R6-NEXT:    j $BB0_10
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_7: # %sw.bb4
+; MIPS32R6-NEXT:  $BB0_6: # %sw.bb4
 ; MIPS32R6-NEXT:    lui $1, %hi($.str.4)
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.4)
 ; MIPS32R6-NEXT:    j $BB0_10
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_8: # %sw.bb5
+; MIPS32R6-NEXT:  $BB0_7: # %sw.bb5
 ; MIPS32R6-NEXT:    lui $1, %hi($.str.5)
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.5)
 ; MIPS32R6-NEXT:    j $BB0_10
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
-; MIPS32R6-NEXT:  $BB0_9: # %sw.bb6
+; MIPS32R6-NEXT:  $BB0_8: # %sw.bb6
 ; MIPS32R6-NEXT:    lui $1, %hi($.str.6)
 ; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.6)
+; MIPS32R6-NEXT:    j $BB0_10
+; MIPS32R6-NEXT:    sw $1, 8($sp)
+; MIPS32R6-NEXT:  $BB0_9: # %sw.epilog
+; MIPS32R6-NEXT:    lui $1, %hi($.str.7)
+; MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.7)
 ; MIPS32R6-NEXT:    sw $1, 8($sp)
 ; MIPS32R6-NEXT:  $BB0_10: # %return
 ; MIPS32R6-NEXT:    lw $2, 8($sp)
@@ -157,7 +157,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    .cfi_def_cfa_offset 16
 ; MIPS64R2-NEXT:    dext $2, $4, 0, 32
 ; MIPS64R2-NEXT:    sltiu $1, $2, 7
-; MIPS64R2-NEXT:    beqz $1, .LBB0_3
+; MIPS64R2-NEXT:    beqz $1, .LBB0_9
 ; MIPS64R2-NEXT:    sw $4, 4($sp)
 ; MIPS64R2-NEXT:  .LBB0_1: # %entry
 ; MIPS64R2-NEXT:    dsll $1, $2, 3
@@ -179,16 +179,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str)
 ; MIPS64R2-NEXT:    j .LBB0_10
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_3: # %sw.epilog
-; MIPS64R2-NEXT:    lui $1, %highest(.L.str.7)
-; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.7)
-; MIPS64R2-NEXT:    dsll $1, $1, 16
-; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.7)
-; MIPS64R2-NEXT:    dsll $1, $1, 16
-; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.7)
-; MIPS64R2-NEXT:    j .LBB0_10
-; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_4: # %sw.bb1
+; MIPS64R2-NEXT:  .LBB0_3: # %sw.bb1
 ; MIPS64R2-NEXT:    lui $1, %highest(.L.str.1)
 ; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.1)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
@@ -197,7 +188,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.1)
 ; MIPS64R2-NEXT:    j .LBB0_10
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_5: # %sw.bb2
+; MIPS64R2-NEXT:  .LBB0_4: # %sw.bb2
 ; MIPS64R2-NEXT:    lui $1, %highest(.L.str.2)
 ; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.2)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
@@ -206,7 +197,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.2)
 ; MIPS64R2-NEXT:    j .LBB0_10
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_6: # %sw.bb3
+; MIPS64R2-NEXT:  .LBB0_5: # %sw.bb3
 ; MIPS64R2-NEXT:    lui $1, %highest(.L.str.3)
 ; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.3)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
@@ -215,7 +206,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.3)
 ; MIPS64R2-NEXT:    j .LBB0_10
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_7: # %sw.bb4
+; MIPS64R2-NEXT:  .LBB0_6: # %sw.bb4
 ; MIPS64R2-NEXT:    lui $1, %highest(.L.str.4)
 ; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.4)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
@@ -224,7 +215,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.4)
 ; MIPS64R2-NEXT:    j .LBB0_10
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_8: # %sw.bb5
+; MIPS64R2-NEXT:  .LBB0_7: # %sw.bb5
 ; MIPS64R2-NEXT:    lui $1, %highest(.L.str.5)
 ; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.5)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
@@ -233,13 +224,22 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.5)
 ; MIPS64R2-NEXT:    j .LBB0_10
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
-; MIPS64R2-NEXT:  .LBB0_9: # %sw.bb6
+; MIPS64R2-NEXT:  .LBB0_8: # %sw.bb6
 ; MIPS64R2-NEXT:    lui $1, %highest(.L.str.6)
 ; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.6)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
 ; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.6)
 ; MIPS64R2-NEXT:    dsll $1, $1, 16
 ; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.6)
+; MIPS64R2-NEXT:    j .LBB0_10
+; MIPS64R2-NEXT:    sd $1, 8($sp)
+; MIPS64R2-NEXT:  .LBB0_9: # %sw.epilog
+; MIPS64R2-NEXT:    lui $1, %highest(.L.str.7)
+; MIPS64R2-NEXT:    daddiu $1, $1, %higher(.L.str.7)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %hi(.L.str.7)
+; MIPS64R2-NEXT:    dsll $1, $1, 16
+; MIPS64R2-NEXT:    daddiu $1, $1, %lo(.L.str.7)
 ; MIPS64R2-NEXT:    sd $1, 8($sp)
 ; MIPS64R2-NEXT:  .LBB0_10: # %return
 ; MIPS64R2-NEXT:    ld $2, 8($sp)
@@ -252,7 +252,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    .cfi_def_cfa_offset 16
 ; MIPS64R6-NEXT:    dext $2, $4, 0, 32
 ; MIPS64R6-NEXT:    sltiu $1, $2, 7
-; MIPS64R6-NEXT:    beqz $1, .LBB0_3
+; MIPS64R6-NEXT:    beqz $1, .LBB0_9
 ; MIPS64R6-NEXT:    sw $4, 4($sp)
 ; MIPS64R6-NEXT:  .LBB0_1: # %entry
 ; MIPS64R6-NEXT:    dsll $1, $2, 3
@@ -274,16 +274,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str)
 ; MIPS64R6-NEXT:    j .LBB0_10
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_3: # %sw.epilog
-; MIPS64R6-NEXT:    lui $1, %highest(.L.str.7)
-; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.7)
-; MIPS64R6-NEXT:    dsll $1, $1, 16
-; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.7)
-; MIPS64R6-NEXT:    dsll $1, $1, 16
-; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.7)
-; MIPS64R6-NEXT:    j .LBB0_10
-; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_4: # %sw.bb1
+; MIPS64R6-NEXT:  .LBB0_3: # %sw.bb1
 ; MIPS64R6-NEXT:    lui $1, %highest(.L.str.1)
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.1)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
@@ -292,7 +283,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.1)
 ; MIPS64R6-NEXT:    j .LBB0_10
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_5: # %sw.bb2
+; MIPS64R6-NEXT:  .LBB0_4: # %sw.bb2
 ; MIPS64R6-NEXT:    lui $1, %highest(.L.str.2)
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.2)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
@@ -301,7 +292,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.2)
 ; MIPS64R6-NEXT:    j .LBB0_10
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_6: # %sw.bb3
+; MIPS64R6-NEXT:  .LBB0_5: # %sw.bb3
 ; MIPS64R6-NEXT:    lui $1, %highest(.L.str.3)
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.3)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
@@ -310,7 +301,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.3)
 ; MIPS64R6-NEXT:    j .LBB0_10
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_7: # %sw.bb4
+; MIPS64R6-NEXT:  .LBB0_6: # %sw.bb4
 ; MIPS64R6-NEXT:    lui $1, %highest(.L.str.4)
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.4)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
@@ -319,7 +310,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.4)
 ; MIPS64R6-NEXT:    j .LBB0_10
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_8: # %sw.bb5
+; MIPS64R6-NEXT:  .LBB0_7: # %sw.bb5
 ; MIPS64R6-NEXT:    lui $1, %highest(.L.str.5)
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.5)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
@@ -328,13 +319,22 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.5)
 ; MIPS64R6-NEXT:    j .LBB0_10
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
-; MIPS64R6-NEXT:  .LBB0_9: # %sw.bb6
+; MIPS64R6-NEXT:  .LBB0_8: # %sw.bb6
 ; MIPS64R6-NEXT:    lui $1, %highest(.L.str.6)
 ; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.6)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
 ; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.6)
 ; MIPS64R6-NEXT:    dsll $1, $1, 16
 ; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.6)
+; MIPS64R6-NEXT:    j .LBB0_10
+; MIPS64R6-NEXT:    sd $1, 8($sp)
+; MIPS64R6-NEXT:  .LBB0_9: # %sw.epilog
+; MIPS64R6-NEXT:    lui $1, %highest(.L.str.7)
+; MIPS64R6-NEXT:    daddiu $1, $1, %higher(.L.str.7)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %hi(.L.str.7)
+; MIPS64R6-NEXT:    dsll $1, $1, 16
+; MIPS64R6-NEXT:    daddiu $1, $1, %lo(.L.str.7)
 ; MIPS64R6-NEXT:    sd $1, 8($sp)
 ; MIPS64R6-NEXT:  .LBB0_10: # %return
 ; MIPS64R6-NEXT:    ld $2, 8($sp)
@@ -349,7 +349,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS32R2-NEXT:    .cfi_def_cfa_offset 16
 ; PIC-MIPS32R2-NEXT:    addu $2, $2, $25
 ; PIC-MIPS32R2-NEXT:    sltiu $1, $4, 7
-; PIC-MIPS32R2-NEXT:    beqz $1, $BB0_3
+; PIC-MIPS32R2-NEXT:    beqz $1, $BB0_9
 ; PIC-MIPS32R2-NEXT:    sw $4, 4($sp)
 ; PIC-MIPS32R2-NEXT:  $BB0_1: # %entry
 ; PIC-MIPS32R2-NEXT:    sll $1, $4, 2
@@ -364,39 +364,39 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str)
 ; PIC-MIPS32R2-NEXT:    b $BB0_10
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_3: # %sw.epilog
-; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.7)($2)
-; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.7)
-; PIC-MIPS32R2-NEXT:    b $BB0_10
-; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_4: # %sw.bb1
+; PIC-MIPS32R2-NEXT:  $BB0_3: # %sw.bb1
 ; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.1)($2)
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.1)
 ; PIC-MIPS32R2-NEXT:    b $BB0_10
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_5: # %sw.bb2
+; PIC-MIPS32R2-NEXT:  $BB0_4: # %sw.bb2
 ; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.2)($2)
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.2)
 ; PIC-MIPS32R2-NEXT:    b $BB0_10
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_6: # %sw.bb3
+; PIC-MIPS32R2-NEXT:  $BB0_5: # %sw.bb3
 ; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.3)($2)
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.3)
 ; PIC-MIPS32R2-NEXT:    b $BB0_10
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_7: # %sw.bb4
+; PIC-MIPS32R2-NEXT:  $BB0_6: # %sw.bb4
 ; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.4)($2)
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.4)
 ; PIC-MIPS32R2-NEXT:    b $BB0_10
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_8: # %sw.bb5
+; PIC-MIPS32R2-NEXT:  $BB0_7: # %sw.bb5
 ; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.5)($2)
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.5)
 ; PIC-MIPS32R2-NEXT:    b $BB0_10
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R2-NEXT:  $BB0_9: # %sw.bb6
+; PIC-MIPS32R2-NEXT:  $BB0_8: # %sw.bb6
 ; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.6)($2)
 ; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.6)
+; PIC-MIPS32R2-NEXT:    b $BB0_10
+; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R2-NEXT:  $BB0_9: # %sw.epilog
+; PIC-MIPS32R2-NEXT:    lw $1, %got($.str.7)($2)
+; PIC-MIPS32R2-NEXT:    addiu $1, $1, %lo($.str.7)
 ; PIC-MIPS32R2-NEXT:    sw $1, 8($sp)
 ; PIC-MIPS32R2-NEXT:  $BB0_10: # %return
 ; PIC-MIPS32R2-NEXT:    lw $2, 8($sp)
@@ -411,7 +411,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS32R6-NEXT:    .cfi_def_cfa_offset 16
 ; PIC-MIPS32R6-NEXT:    addu $2, $2, $25
 ; PIC-MIPS32R6-NEXT:    sltiu $1, $4, 7
-; PIC-MIPS32R6-NEXT:    beqz $1, $BB0_3
+; PIC-MIPS32R6-NEXT:    beqz $1, $BB0_9
 ; PIC-MIPS32R6-NEXT:    sw $4, 4($sp)
 ; PIC-MIPS32R6-NEXT:  $BB0_1: # %entry
 ; PIC-MIPS32R6-NEXT:    sll $1, $4, 2
@@ -426,39 +426,39 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str)
 ; PIC-MIPS32R6-NEXT:    b $BB0_10
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_3: # %sw.epilog
-; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.7)($2)
-; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.7)
-; PIC-MIPS32R6-NEXT:    b $BB0_10
-; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_4: # %sw.bb1
+; PIC-MIPS32R6-NEXT:  $BB0_3: # %sw.bb1
 ; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.1)($2)
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.1)
 ; PIC-MIPS32R6-NEXT:    b $BB0_10
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_5: # %sw.bb2
+; PIC-MIPS32R6-NEXT:  $BB0_4: # %sw.bb2
 ; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.2)($2)
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.2)
 ; PIC-MIPS32R6-NEXT:    b $BB0_10
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_6: # %sw.bb3
+; PIC-MIPS32R6-NEXT:  $BB0_5: # %sw.bb3
 ; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.3)($2)
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.3)
 ; PIC-MIPS32R6-NEXT:    b $BB0_10
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_7: # %sw.bb4
+; PIC-MIPS32R6-NEXT:  $BB0_6: # %sw.bb4
 ; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.4)($2)
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.4)
 ; PIC-MIPS32R6-NEXT:    b $BB0_10
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_8: # %sw.bb5
+; PIC-MIPS32R6-NEXT:  $BB0_7: # %sw.bb5
 ; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.5)($2)
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.5)
 ; PIC-MIPS32R6-NEXT:    b $BB0_10
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
-; PIC-MIPS32R6-NEXT:  $BB0_9: # %sw.bb6
+; PIC-MIPS32R6-NEXT:  $BB0_8: # %sw.bb6
 ; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.6)($2)
 ; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.6)
+; PIC-MIPS32R6-NEXT:    b $BB0_10
+; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
+; PIC-MIPS32R6-NEXT:  $BB0_9: # %sw.epilog
+; PIC-MIPS32R6-NEXT:    lw $1, %got($.str.7)($2)
+; PIC-MIPS32R6-NEXT:    addiu $1, $1, %lo($.str.7)
 ; PIC-MIPS32R6-NEXT:    sw $1, 8($sp)
 ; PIC-MIPS32R6-NEXT:  $BB0_10: # %return
 ; PIC-MIPS32R6-NEXT:    lw $2, 8($sp)
@@ -474,7 +474,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS64R2-NEXT:    daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
 ; PIC-MIPS64R2-NEXT:    dext $3, $4, 0, 32
 ; PIC-MIPS64R2-NEXT:    sltiu $1, $3, 7
-; PIC-MIPS64R2-NEXT:    beqz $1, .LBB0_3
+; PIC-MIPS64R2-NEXT:    beqz $1, .LBB0_9
 ; PIC-MIPS64R2-NEXT:    sw $4, 4($sp)
 ; PIC-MIPS64R2-NEXT:  .LBB0_1: # %entry
 ; PIC-MIPS64R2-NEXT:    dsll $1, $3, 3
@@ -489,39 +489,39 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str)
 ; PIC-MIPS64R2-NEXT:    b .LBB0_10
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_3: # %sw.epilog
-; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.7)($2)
-; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.7)
-; PIC-MIPS64R2-NEXT:    b .LBB0_10
-; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_4: # %sw.bb1
+; PIC-MIPS64R2-NEXT:  .LBB0_3: # %sw.bb1
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.1)($2)
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.1)
 ; PIC-MIPS64R2-NEXT:    b .LBB0_10
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_5: # %sw.bb2
+; PIC-MIPS64R2-NEXT:  .LBB0_4: # %sw.bb2
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.2)($2)
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.2)
 ; PIC-MIPS64R2-NEXT:    b .LBB0_10
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_6: # %sw.bb3
+; PIC-MIPS64R2-NEXT:  .LBB0_5: # %sw.bb3
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.3)($2)
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.3)
 ; PIC-MIPS64R2-NEXT:    b .LBB0_10
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_7: # %sw.bb4
+; PIC-MIPS64R2-NEXT:  .LBB0_6: # %sw.bb4
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.4)($2)
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.4)
 ; PIC-MIPS64R2-NEXT:    b .LBB0_10
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_8: # %sw.bb5
+; PIC-MIPS64R2-NEXT:  .LBB0_7: # %sw.bb5
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.5)($2)
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.5)
 ; PIC-MIPS64R2-NEXT:    b .LBB0_10
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R2-NEXT:  .LBB0_9: # %sw.bb6
+; PIC-MIPS64R2-NEXT:  .LBB0_8: # %sw.bb6
 ; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.6)($2)
 ; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.6)
+; PIC-MIPS64R2-NEXT:    b .LBB0_10
+; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R2-NEXT:  .LBB0_9: # %sw.epilog
+; PIC-MIPS64R2-NEXT:    ld $1, %got_page(.L.str.7)($2)
+; PIC-MIPS64R2-NEXT:    daddiu $1, $1, %got_ofst(.L.str.7)
 ; PIC-MIPS64R2-NEXT:    sd $1, 8($sp)
 ; PIC-MIPS64R2-NEXT:  .LBB0_10: # %return
 ; PIC-MIPS64R2-NEXT:    ld $2, 8($sp)
@@ -537,7 +537,7 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS64R6-NEXT:    daddiu $2, $1, %lo(%neg(%gp_rel(_Z3fooi)))
 ; PIC-MIPS64R6-NEXT:    dext $3, $4, 0, 32
 ; PIC-MIPS64R6-NEXT:    sltiu $1, $3, 7
-; PIC-MIPS64R6-NEXT:    beqz $1, .LBB0_3
+; PIC-MIPS64R6-NEXT:    beqz $1, .LBB0_9
 ; PIC-MIPS64R6-NEXT:    sw $4, 4($sp)
 ; PIC-MIPS64R6-NEXT:  .LBB0_1: # %entry
 ; PIC-MIPS64R6-NEXT:    dsll $1, $3, 3
@@ -552,39 +552,39 @@ define ptr @_Z3fooi(i32 signext %Letter) {
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str)
 ; PIC-MIPS64R6-NEXT:    b .LBB0_10
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_3: # %sw.epilog
-; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.7)($2)
-; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.7)
-; PIC-MIPS64R6-NEXT:    b .LBB0_10
-; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_4: # %sw.bb1
+; PIC-MIPS64R6-NEXT:  .LBB0_3: # %sw.bb1
 ; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.1)($2)
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.1)
 ; PIC-MIPS64R6-NEXT:    b .LBB0_10
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_5: # %sw.bb2
+; PIC-MIPS64R6-NEXT:  .LBB0_4: # %sw.bb2
 ; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.2)($2)
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.2)
 ; PIC-MIPS64R6-NEXT:    b .LBB0_10
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_6: # %sw.bb3
+; PIC-MIPS64R6-NEXT:  .LBB0_5: # %sw.bb3
 ; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.3)($2)
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.3)
 ; PIC-MIPS64R6-NEXT:    b .LBB0_10
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_7: # %sw.bb4
+; PIC-MIPS64R6-NEXT:  .LBB0_6: # %sw.bb4
 ; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.4)($2)
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.4)
 ; PIC-MIPS64R6-NEXT:    b .LBB0_10
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_8: # %sw.bb5
+; PIC-MIPS64R6-NEXT:  .LBB0_7: # %sw.bb5
 ; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.5)($2)
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.5)
 ; PIC-MIPS64R6-NEXT:    b .LBB0_10
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
-; PIC-MIPS64R6-NEXT:  .LBB0_9: # %sw.bb6
+; PIC-MIPS64R6-NEXT:  .LBB0_8: # %sw.bb6
 ; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.6)($2)
 ; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.6)
+; PIC-MIPS64R6-NEXT:    b .LBB0_10
+; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
+; PIC-MIPS64R6-NEXT:  .LBB0_9: # %sw.epilog
+; PIC-MIPS64R6-NEXT:    ld $1, %got_page(.L.str.7)($2)
+; PIC-MIPS64R6-NEXT:    daddiu $1, $1, %got_ofst(.L.str.7)
 ; PIC-MIPS64R6-NEXT:    sd $1, 8($sp)
 ; PIC-MIPS64R6-NEXT:  .LBB0_10: # %return
 ; PIC-MIPS64R6-NEXT:    ld $2, 8($sp)
diff --git a/llvm/test/CodeGen/Mips/jump-table-mul.ll b/llvm/test/CodeGen/Mips/jump-table-mul.ll
index ef7452cf253fee6..22f41f53d154bf2 100644
--- a/llvm/test/CodeGen/Mips/jump-table-mul.ll
+++ b/llvm/test/CodeGen/Mips/jump-table-mul.ll
@@ -8,15 +8,11 @@ define i64 @test(i64 %arg) {
 ; CHECK-NEXT:    lui $1, %hi(%neg(%gp_rel(test)))
 ; CHECK-NEXT:    daddu $2, $1, $25
 ; CHECK-NEXT:    sltiu $1, $4, 11
-; CHECK-NEXT:    beqz $1, .LBB0_3
+; CHECK-NEXT:    beqz $1, .LBB0_4
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  .LBB0_1: # %entry
 ; CHECK-NEXT:    daddiu $1, $2, %lo(%neg(%gp_rel(test)))
 ; CHECK-NEXT:    dsll $2, $4, 3
-; Previously this dsll was the following sequence:
-;	daddiu	$2, $zero, 8
-;	dmult	$4, $2
-;	mflo	$2
 ; CHECK-NEXT:    ld $3, %got_page(.LJTI0_0)($1)
 ; CHECK-NEXT:    daddu $2, $2, $3
 ; CHECK-NEXT:    ld $2, %got_ofst(.LJTI0_0)($2)
@@ -26,12 +22,16 @@ define i64 @test(i64 %arg) {
 ; CHECK-NEXT:  .LBB0_2: # %sw.bb
 ; CHECK-NEXT:    jr $ra
 ; CHECK-NEXT:    daddiu $2, $zero, 1
-; CHECK-NEXT:  .LBB0_3: # %default
-; CHECK-NEXT:    jr $ra
-; CHECK-NEXT:    daddiu $2, $zero, 1234
-; CHECK-NEXT:  .LBB0_4: # %sw.bb1
+; CHECK-NEXT:  .LBB0_3: # %sw.bb1
 ; CHECK-NEXT:    jr $ra
 ; CHECK-NEXT:    daddiu $2, $zero, 0
+; CHECK-NEXT:  .LBB0_4: # %default
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    daddiu $2, $zero, 1234
+; Previously this dsll was the following sequence:
+;	daddiu	$2, $zero, 8
+;	dmult	$4, $2
+;	mflo	$2
 entry:
   switch i64 %arg, label %default [
     i64 0, label %sw.bb
@@ -54,13 +54,13 @@ sw.bb1:
 ; CHECK-NEXT: 	.p2align	3
 ; CHECK-LABEL: .LJTI0_0:
 ; CHECK-NEXT: 	.gpdword	.LBB0_2
-; CHECK-NEXT: 	.gpdword	.LBB0_3
-; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_4
+; CHECK-NEXT: 	.gpdword	.LBB0_4
 ; CHECK-NEXT: 	.gpdword	.LBB0_2
-; CHECK-NEXT: 	.gpdword	.LBB0_3
+; CHECK-NEXT: 	.gpdword	.LBB0_4
 ; CHECK-NEXT: 	.gpdword	.LBB0_2
-; CHECK-NEXT: 	.gpdword	.LBB0_3
-; CHECK-NEXT: 	.gpdword	.LBB0_3
-; CHECK-NEXT: 	.gpdword	.LBB0_3
-; CHECK-NEXT: 	.gpdword	.LBB0_3
 ; CHECK-NEXT: 	.gpdword	.LBB0_4
+; CHECK-NEXT: 	.gpdword	.LBB0_4
+; CHECK-NEXT: 	.gpdword	.LBB0_4
+; CHECK-NEXT: 	.gpdword	.LBB0_4
+; CHECK-NEXT: 	.gpdword	.LBB0_3
diff --git a/llvm/test/CodeGen/Mips/nacl-align.ll b/llvm/test/CodeGen/Mips/nacl-align.ll
index bca6c93de2624d3..964ddfc963c4ec3 100644
--- a/llvm/test/CodeGen/Mips/nacl-align.ll
+++ b/llvm/test/CodeGen/Mips/nacl-align.ll
@@ -44,9 +44,6 @@ default:
 ; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
 ; CHECK-NEXT:        jr      $ra
 ; CHECK-NEXT:        addiu   $2, $zero, 111
-; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
-; CHECK-NEXT:        jr      $ra
-; CHECK-NEXT:        addiu   $2, $zero, 555
 ; CHECK-NEXT:        .p2align  4
 ; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
 ; CHECK-NEXT:        jr      $ra
@@ -55,6 +52,13 @@ default:
 ; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
 ; CHECK-NEXT:        jr      $ra
 ; CHECK-NEXT:        addiu   $2, $zero, 333
+; CHECK-NEXT:        .p2align  4
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 444
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 555
 
 }
 
diff --git a/llvm/test/CodeGen/Mips/pseudo-jump-fill.ll b/llvm/test/CodeGen/Mips/pseudo-jump-fill.ll
index 31f077d57a93355..afb79e55f4f90b8 100644
--- a/llvm/test/CodeGen/Mips/pseudo-jump-fill.ll
+++ b/llvm/test/CodeGen/Mips/pseudo-jump-fill.ll
@@ -12,7 +12,7 @@ define i32 @test(i32 signext %x, i32 signext %c) {
 ; CHECK-NEXT:    addiu $2, $2, %lo(_gp_disp)
 ; CHECK-NEXT:    addiur2 $5, $5, -1
 ; CHECK-NEXT:    sltiu $1, $5, 4
-; CHECK-NEXT:    beqz $1, $BB0_3
+; CHECK-NEXT:    beqz $1, $BB0_6
 ; CHECK-NEXT:    addu $3, $2, $25
 ; CHECK-NEXT:  $BB0_1: # %entry
 ; CHECK-NEXT:    li16 $2, 0
@@ -26,17 +26,17 @@ define i32 @test(i32 signext %x, i32 signext %c) {
 ; CHECK-NEXT:  $BB0_2: # %sw.bb2
 ; CHECK-NEXT:    addiur2 $2, $4, 1
 ; CHECK-NEXT:    jrc $ra
-; CHECK-NEXT:  $BB0_3:
-; CHECK-NEXT:    move $2, $4
-; CHECK-NEXT:    jrc $ra
-; CHECK-NEXT:  $BB0_4: # %sw.bb3
+; CHECK-NEXT:  $BB0_3: # %sw.bb3
 ; CHECK-NEXT:    addius5 $4, 2
 ; CHECK-NEXT:    move $2, $4
 ; CHECK-NEXT:    jrc $ra
-; CHECK-NEXT:  $BB0_5: # %sw.bb5
+; CHECK-NEXT:  $BB0_4: # %sw.bb5
 ; CHECK-NEXT:    addius5 $4, 3
 ; CHECK-NEXT:    move $2, $4
-; CHECK-NEXT:  $BB0_6: # %for.cond.cleanup
+; CHECK-NEXT:  $BB0_5: # %for.cond.cleanup
+; CHECK-NEXT:    jrc $ra
+; CHECK-NEXT:  $BB0_6:
+; CHECK-NEXT:    move $2, $4
 ; CHECK-NEXT:    jrc $ra
 entry:
   switch i32 %c, label %sw.epilog [
diff --git a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index db4e22fd8d17afc..e76e6b1e80fbaa7 100644
--- a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -42,9 +42,16 @@ define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) personality ptr @__
 ; CHECK-NEXT:  .Ltmp1:
 ; CHECK-NEXT:  # %bb.1: # %bb30.preheader
 ; CHECK-NEXT:    addi 28, 31, 120
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_2: # %invcont23
+; CHECK-NEXT:    #
+; CHECK-NEXT:    ld 3, 128(31)
+; CHECK-NEXT:    sub 30, 30, 3
+; CHECK-NEXT:  .LBB0_3: # %bb30
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    cmpldi 30, 0
-; CHECK-NEXT:    beq 0, .LBB0_4
-; CHECK-NEXT:  .LBB0_2: # %cond_true
+; CHECK-NEXT:    beq 0, .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %cond_true
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:  .Ltmp3:
 ; CHECK-NEXT:    mr 3, 29
@@ -52,13 +59,8 @@ define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) personality ptr @__
 ; CHECK-NEXT:    bl Bar
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:  .Ltmp4:
-; CHECK-NEXT:  # %bb.3: # %invcont23
-; CHECK-NEXT:    #
-; CHECK-NEXT:    ld 3, 128(31)
-; CHECK-NEXT:    sub 30, 30, 3
-; CHECK-NEXT:    cmpldi 30, 0
-; CHECK-NEXT:    bne 0, .LBB0_2
-; CHECK-NEXT:  .LBB0_4: # %cleanup
+; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:  .LBB0_5: # %cleanup
 ; CHECK-NEXT:    ld 30, 160(31) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 29, 152(31) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 28, 144(31) # 8-byte Folded Reload
@@ -68,12 +70,12 @@ define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) personality ptr @__
 ; CHECK-NEXT:    ld 31, -8(1)
 ; CHECK-NEXT:    mtlr 0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB0_5: # %unwind.loopexit.split-lp
+; CHECK-NEXT:  .LBB0_6: # %unwind.loopexit.split-lp
 ; CHECK-NEXT:  .Ltmp2:
-; CHECK-NEXT:    b .LBB0_7
-; CHECK-NEXT:  .LBB0_6: # %unwind.loopexit
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_7: # %unwind.loopexit
 ; CHECK-NEXT:  .Ltmp5:
-; CHECK-NEXT:  .LBB0_7: # %unwind
+; CHECK-NEXT:  .LBB0_8: # %unwind
 ; CHECK-NEXT:    ld 4, 0(1)
 ; CHECK-NEXT:    mr 1, 27
 ; CHECK-NEXT:    std 4, 0(1)
diff --git a/llvm/test/CodeGen/PowerPC/branch-opt.ll b/llvm/test/CodeGen/PowerPC/branch-opt.ll
index 5e31270840b80d9..95cfd52036e47f4 100644
--- a/llvm/test/CodeGen/PowerPC/branch-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/branch-opt.ll
@@ -11,10 +11,11 @@ target triple = "powerpc-unknown-linux-gnu"
 ; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
 ; of %cond_next48, so there should only be two unconditional branches.
 
-;CHECK: b .LBB0_13
-;CHECK: b .LBB0_13
-;CHECK-NOT: b .LBB0_13
-;CHECK: .LBB0_13: # %cond_next48
+;CHECK: b .LBB0_14
+;CHECK: b .LBB0_14
+;CHECK: b .LBB0_14
+;CHECK-NOT: b .LBB0_14
+;CHECK: .LBB0_14: # %cond_next48
 
 define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
index ccc9adbc2bdd1dd..efb356b7ed817f1 100644
--- a/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
+++ b/llvm/test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll
@@ -11,7 +11,7 @@ define dso_local zeroext i32 @test(i32 signext %l) nounwind {
 ; CHECK-NEXT:    addi r3, r3, -1
 ; CHECK-NEXT:    std r0, 48(r1)
 ; CHECK-NEXT:    cmplwi r3, 5
-; CHECK-NEXT:    bgt cr0, .LBB0_3
+; CHECK-NEXT:    bgt cr0, .LBB0_9
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    addis r4, r2, .LC0 at toc@ha
 ; CHECK-NEXT:    rldic r3, r3, 2, 30
@@ -24,42 +24,41 @@ define dso_local zeroext i32 @test(i32 signext %l) nounwind {
 ; CHECK-NEXT:    li r3, 2
 ; CHECK-NEXT:    bl test1
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_3: # %sw.default
-; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    bl test1
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    bl test3
-; CHECK-NEXT:    nop
-; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_4: # %sw.bb3
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_3: # %sw.bb3
 ; CHECK-NEXT:    li r3, 3
-; CHECK-NEXT:    b .LBB0_9
-; CHECK-NEXT:  .LBB0_5: # %sw.bb5
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_4: # %sw.bb5
 ; CHECK-NEXT:    li r3, 4
 ; CHECK-NEXT:    bl test2
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    bl test3
-; CHECK-NEXT:    nop
 ; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_6: # %sw.bb8
+; CHECK-NEXT:  .LBB0_5: # %sw.bb8
 ; CHECK-NEXT:    li r3, 5
 ; CHECK-NEXT:    bl test4
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_7: # %sw.bb10
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_6: # %sw.bb10
 ; CHECK-NEXT:    li r3, 66
 ; CHECK-NEXT:    bl test4
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bl test1
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_8: # %sw.bb13
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_7: # %sw.bb13
 ; CHECK-NEXT:    li r3, 66
-; CHECK-NEXT:  .LBB0_9: # %return
+; CHECK-NEXT:  .LBB0_8: # %return
 ; CHECK-NEXT:    bl test2
 ; CHECK-NEXT:    nop
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_9: # %sw.default
+; CHECK-NEXT:    li r3, 1
+; CHECK-NEXT:    bl test1
+; CHECK-NEXT:    nop
 ; CHECK-NEXT:  .LBB0_10: # %return
+; CHECK-NEXT:    bl test3
+; CHECK-NEXT:    nop
+; CHECK-NEXT:  .LBB0_11: # %return
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    addi r1, r1, 32
 ; CHECK-NEXT:    ld r0, 16(r1)
diff --git a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll
index 7b531087501923a..58c15f31c218361 100644
--- a/llvm/test/CodeGen/PowerPC/licm-tocReg.ll
+++ b/llvm/test/CodeGen/PowerPC/licm-tocReg.ll
@@ -73,20 +73,20 @@ define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 {
 ; CHECKLX-NEXT:    ld 3, .LC0 at toc@l(3)
 ; CHECKLX-NEXT:    ld 5, .LC1 at toc@l(4)
 ; CHECKLX-NEXT:    lwz 6, 0(3)
+; CHECKLX-NEXT:    b .LBB0_2
 ; CHECKLX-NEXT:    .p2align 5
 ; CHECKLX-NEXT:  .LBB0_1: # %if.end
 ; CHECKLX-NEXT:    #
-; CHECKLX-NEXT:    lwz 7, 0(5)
-; CHECKLX-NEXT:    lwz 4, 0(3)
-; CHECKLX-NEXT:    cmpw 6, 7
-; CHECKLX-NEXT:    bgt 0, .LBB0_3
-; CHECKLX-NEXT:  # %bb.2: # %if.end
-; CHECKLX-NEXT:    #
 ; CHECKLX-NEXT:    addi 4, 4, 1
 ; CHECKLX-NEXT:    stw 4, 0(3)
 ; CHECKLX-NEXT:    lwz 6, 0(3)
-; CHECKLX-NEXT:    b .LBB0_1
-; CHECKLX-NEXT:  .LBB0_3: # %if.then
+; CHECKLX-NEXT:  .LBB0_2: # %if.end
+; CHECKLX-NEXT:    #
+; CHECKLX-NEXT:    lwz 7, 0(5)
+; CHECKLX-NEXT:    lwz 4, 0(3)
+; CHECKLX-NEXT:    cmpw 6, 7
+; CHECKLX-NEXT:    ble 0, .LBB0_1
+; CHECKLX-NEXT:  # %bb.3: # %if.then
 ; CHECKLX-NEXT:    mflr 0
 ; CHECKLX-NEXT:    stdu 1, -32(1)
 ; CHECKLX-NEXT:    std 2, 24(1)
@@ -106,19 +106,19 @@ define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 {
 ; CHECKAIX:       # %bb.0: # %entry
 ; CHECKAIX-NEXT:    ld 5, L..C0(2) # @ga
 ; CHECKAIX-NEXT:    ld 6, L..C1(2) # @gb
+; CHECKAIX-NEXT:    b L..BB0_2
 ; CHECKAIX-NEXT:  L..BB0_1: # %if.end
 ; CHECKAIX-NEXT:    #
+; CHECKAIX-NEXT:    addi 4, 4, 1
+; CHECKAIX-NEXT:    stw 4, 0(5)
+; CHECKAIX-NEXT:  L..BB0_2: # %if.end
+; CHECKAIX-NEXT:    #
 ; CHECKAIX-NEXT:    lwz 4, 0(5)
 ; CHECKAIX-NEXT:    lwz 7, 0(6)
 ; CHECKAIX-NEXT:    cmpw 4, 7
 ; CHECKAIX-NEXT:    lwz 4, 0(5)
-; CHECKAIX-NEXT:    bgt 0, L..BB0_3
-; CHECKAIX-NEXT:  # %bb.2: # %if.end
-; CHECKAIX-NEXT:    #
-; CHECKAIX-NEXT:    addi 4, 4, 1
-; CHECKAIX-NEXT:    stw 4, 0(5)
-; CHECKAIX-NEXT:    b L..BB0_1
-; CHECKAIX-NEXT:  L..BB0_3: # %if.then
+; CHECKAIX-NEXT:    ble 0, L..BB0_1
+; CHECKAIX-NEXT:  # %bb.3: # %if.then
 ; CHECKAIX-NEXT:    mflr 0
 ; CHECKAIX-NEXT:    stdu 1, -112(1)
 ; CHECKAIX-NEXT:    ld 5, 0(3)
@@ -139,19 +139,19 @@ define signext i32 @test(ptr nocapture %FP) local_unnamed_addr #0 {
 ; CHECKAIX32:       # %bb.0: # %entry
 ; CHECKAIX32-NEXT:    lwz 5, L..C0(2) # @ga
 ; CHECKAIX32-NEXT:    lwz 6, L..C1(2) # @gb
+; CHECKAIX32-NEXT:    b L..BB0_2
 ; CHECKAIX32-NEXT:  L..BB0_1: # %if.end
 ; CHECKAIX32-NEXT:    #
+; CHECKAIX32-NEXT:    addi 4, 4, 1
+; CHECKAIX32-NEXT:    stw 4, 0(5)
+; CHECKAIX32-NEXT:  L..BB0_2: # %if.end
+; CHECKAIX32-NEXT:    #
 ; CHECKAIX32-NEXT:    lwz 4, 0(5)
 ; CHECKAIX32-NEXT:    lwz 7, 0(6)
 ; CHECKAIX32-NEXT:    cmpw 4, 7
 ; CHECKAIX32-NEXT:    lwz 4, 0(5)
-; CHECKAIX32-NEXT:    bgt 0, L..BB0_3
-; CHECKAIX32-NEXT:  # %bb.2: # %if.end
-; CHECKAIX32-NEXT:    #
-; CHECKAIX32-NEXT:    addi 4, 4, 1
-; CHECKAIX32-NEXT:    stw 4, 0(5)
-; CHECKAIX32-NEXT:    b L..BB0_1
-; CHECKAIX32-NEXT:  L..BB0_3: # %if.then
+; CHECKAIX32-NEXT:    ble 0, L..BB0_1
+; CHECKAIX32-NEXT:  # %bb.3: # %if.then
 ; CHECKAIX32-NEXT:    mflr 0
 ; CHECKAIX32-NEXT:    stwu 1, -64(1)
 ; CHECKAIX32-NEXT:    lwz 5, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 79f2ef3e3746a79..432b73e4a3a53be 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -27,22 +27,9 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
 ; CHECK-NEXT:    mulld 30, 8, 30
 ; CHECK-NEXT:    mulld 28, 8, 28
 ; CHECK-NEXT:    mulld 8, 8, 27
-; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:    b .LBB0_5
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2:
-; CHECK-NEXT:    add 5, 5, 9
-; CHECK-NEXT:    add 12, 12, 0
-; CHECK-NEXT:    add 30, 30, 0
-; CHECK-NEXT:    add 28, 28, 0
-; CHECK-NEXT:    add 8, 8, 0
-; CHECK-NEXT:    cmpd 5, 7
-; CHECK-NEXT:    bge 0, .LBB0_6
-; CHECK-NEXT:  .LBB0_3: # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB0_5 Depth 2
-; CHECK-NEXT:    sub 27, 5, 10
-; CHECK-NEXT:    cmpd 6, 27
-; CHECK-NEXT:    bge 0, .LBB0_2
-; CHECK-NEXT:  # %bb.4:
 ; CHECK-NEXT:    add 25, 6, 12
 ; CHECK-NEXT:    add 24, 6, 8
 ; CHECK-NEXT:    sldi 26, 6, 3
@@ -58,7 +45,7 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
 ; CHECK-NEXT:    add 22, 3, 22
 ; CHECK-NEXT:    add 25, 29, 25
 ; CHECK-NEXT:    .p2align 5
-; CHECK-NEXT:  .LBB0_5: # Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:  .LBB0_3: # Parent Loop BB0_5 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lfd 0, 0(26)
 ; CHECK-NEXT:    lfd 1, 0(23)
@@ -101,8 +88,21 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
 ; CHECK-NEXT:    xsadddp 0, 0, 1
 ; CHECK-NEXT:    stfd 0, 0(26)
 ; CHECK-NEXT:    add 26, 26, 11
-; CHECK-NEXT:    blt 0, .LBB0_5
-; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:    blt 0, .LBB0_3
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    add 5, 5, 9
+; CHECK-NEXT:    add 12, 12, 0
+; CHECK-NEXT:    add 30, 30, 0
+; CHECK-NEXT:    add 28, 28, 0
+; CHECK-NEXT:    add 8, 8, 0
+; CHECK-NEXT:    cmpd 5, 7
+; CHECK-NEXT:    bge 0, .LBB0_6
+; CHECK-NEXT:  .LBB0_5: # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_3 Depth 2
+; CHECK-NEXT:    sub 27, 5, 10
+; CHECK-NEXT:    cmpd 6, 27
+; CHECK-NEXT:    blt 0, .LBB0_2
+; CHECK-NEXT:    b .LBB0_4
 ; CHECK-NEXT:  .LBB0_6:
 ; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
index eeadb73b9db2cff..e97e17fc64782f4 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crgt.ll
@@ -155,11 +155,11 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    b .LBB0_25
 ; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  .LBB0_26: # %bb62
+; CHECK-NEXT:  .LBB0_26: # %bb56
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    b .LBB0_26
 ; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  .LBB0_27: # %bb56
+; CHECK-NEXT:  .LBB0_27: # %bb62
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    b .LBB0_27
 ; CHECK-NEXT:    .p2align 4
@@ -348,11 +348,11 @@ define dso_local fastcc void @P10_Spill_CR_GT() unnamed_addr {
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    b .LBB0_25
 ; CHECK-BE-NEXT:    .p2align 4
-; CHECK-BE-NEXT:  .LBB0_26: # %bb62
+; CHECK-BE-NEXT:  .LBB0_26: # %bb56
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    b .LBB0_26
 ; CHECK-BE-NEXT:    .p2align 4
-; CHECK-BE-NEXT:  .LBB0_27: # %bb56
+; CHECK-BE-NEXT:  .LBB0_27: # %bb62
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    b .LBB0_27
 ; CHECK-BE-NEXT:    .p2align 4
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
index 32f3342243904e6..4b032781c3764cf 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll
@@ -59,10 +59,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    plwz r3, call_1 at PCREL(0), 1
 ; CHECK-NEXT:    cmplwi r3, 0
-; CHECK-NEXT:    bne- cr0, .LBB0_10
+; CHECK-NEXT:    bne- cr0, .LBB0_9
 ; CHECK-NEXT:  # %bb.5: # %bb30
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    bc 12, 4*cr3+eq, .LBB0_9
+; CHECK-NEXT:    bc 12, 4*cr3+eq, .LBB0_11
 ; CHECK-NEXT:  # %bb.6: # %bb32
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    rlwinm r30, r30, 0, 24, 22
@@ -72,10 +72,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-NEXT:  # %bb.7: # %bb37
 ; CHECK-NEXT:  .LBB0_8: # %bb22
-; CHECK-NEXT:  .LBB0_9: # %bb35
-; CHECK-NEXT:  .LBB0_10: # %bb27
+; CHECK-NEXT:  .LBB0_9: # %bb27
 ; CHECK-NEXT:    bc 4, 4*cr3+lt, .LBB0_12
-; CHECK-NEXT:  # %bb.11: # %bb28
+; CHECK-NEXT:  # %bb.10: # %bb28
+; CHECK-NEXT:  .LBB0_11: # %bb35
 ; CHECK-NEXT:  .LBB0_12: # %bb29
 ; CHECK-NEXT:  .LBB0_13: # %bb3
 ; CHECK-NEXT:  .LBB0_14: # %bb2
@@ -120,10 +120,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    lwz r3, call_1 at toc@l(r30)
 ; CHECK-BE-NEXT:    cmplwi r3, 0
-; CHECK-BE-NEXT:    bne- cr0, .LBB0_10
+; CHECK-BE-NEXT:    bne- cr0, .LBB0_9
 ; CHECK-BE-NEXT:  # %bb.5: # %bb30
 ; CHECK-BE-NEXT:    #
-; CHECK-BE-NEXT:    bc 12, 4*cr3+eq, .LBB0_9
+; CHECK-BE-NEXT:    bc 12, 4*cr3+eq, .LBB0_11
 ; CHECK-BE-NEXT:  # %bb.6: # %bb32
 ; CHECK-BE-NEXT:    #
 ; CHECK-BE-NEXT:    rlwinm r29, r29, 0, 24, 22
@@ -134,10 +134,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr {
 ; CHECK-BE-NEXT:    beq+ cr2, .LBB0_3
 ; CHECK-BE-NEXT:  # %bb.7: # %bb37
 ; CHECK-BE-NEXT:  .LBB0_8: # %bb22
-; CHECK-BE-NEXT:  .LBB0_9: # %bb35
-; CHECK-BE-NEXT:  .LBB0_10: # %bb27
+; CHECK-BE-NEXT:  .LBB0_9: # %bb27
 ; CHECK-BE-NEXT:    bc 4, 4*cr3+lt, .LBB0_12
-; CHECK-BE-NEXT:  # %bb.11: # %bb28
+; CHECK-BE-NEXT:  # %bb.10: # %bb28
+; CHECK-BE-NEXT:  .LBB0_11: # %bb35
 ; CHECK-BE-NEXT:  .LBB0_12: # %bb29
 ; CHECK-BE-NEXT:  .LBB0_13: # %bb3
 ; CHECK-BE-NEXT:  .LBB0_14: # %bb2
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index 4ca2dc5dbe04da4..fd049280c9f39f6 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -82,7 +82,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-NEXT:    lwz r28, 0(r3)
 ; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_5
 ; CHECK-NEXT:  # %bb.4: # %bb37
-; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_14
+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_12
 ; CHECK-NEXT:  .LBB0_5: # %bb42
 ; CHECK-NEXT:    paddi r3, 0, global_1 at PCREL, 1
 ; CHECK-NEXT:    li r4, 0
@@ -95,10 +95,10 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-NEXT:    lxsihzx v2, 0, r3
 ; CHECK-NEXT:    vextsh2d v2, v2
 ; CHECK-NEXT:    xscvsxdsp f0, v2
-; CHECK-NEXT:    bc 12, 4*cr2+lt, .LBB0_12
+; CHECK-NEXT:    bc 12, 4*cr2+lt, .LBB0_13
 ; CHECK-NEXT:  # %bb.6: # %bb42
 ; CHECK-NEXT:    xxspltidp vs1, 1069547520
-; CHECK-NEXT:    b .LBB0_13
+; CHECK-NEXT:    b .LBB0_14
 ; CHECK-NEXT:  .LBB0_7: # %bb19
 ; CHECK-NEXT:    setnbc r3, 4*cr2+un
 ; CHECK-NEXT:    paddi r4, 0, global_4 at PCREL, 1
@@ -134,15 +134,15 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-NEXT:    rlwimi r3, r4, 21, 11, 11
 ; CHECK-NEXT:    mtocrf 32, r3
 ; CHECK-NEXT:    b .LBB0_16
-; CHECK-NEXT:  .LBB0_12:
+; CHECK-NEXT:  .LBB0_12: # %bb41
+; CHECK-NEXT:    # implicit-def: $r3
+; CHECK-NEXT:    b .LBB0_15
+; CHECK-NEXT:  .LBB0_13:
 ; CHECK-NEXT:    xxspltidp vs1, 1071644672
-; CHECK-NEXT:  .LBB0_13: # %bb42
+; CHECK-NEXT:  .LBB0_14: # %bb42
 ; CHECK-NEXT:    xsmulsp f0, f1, f0
 ; CHECK-NEXT:    xscvdpsxws f0, f0
 ; CHECK-NEXT:    mffprwz r3, f0
-; CHECK-NEXT:    b .LBB0_15
-; CHECK-NEXT:  .LBB0_14: # %bb41
-; CHECK-NEXT:    # implicit-def: $r3
 ; CHECK-NEXT:  .LBB0_15: # %bb50
 ; CHECK-NEXT:    li r4, 0
 ; CHECK-NEXT:    xxspltidp vs3, -1082130432
@@ -232,7 +232,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-BE-NEXT:    addis r3, r2, global_1 at toc@ha
 ; CHECK-BE-NEXT:    bc 12, 4*cr5+lt, .LBB0_5
 ; CHECK-BE-NEXT:  # %bb.4: # %bb37
-; CHECK-BE-NEXT:    bc 4, 4*cr5+lt, .LBB0_14
+; CHECK-BE-NEXT:    bc 4, 4*cr5+lt, .LBB0_12
 ; CHECK-BE-NEXT:  .LBB0_5: # %bb42
 ; CHECK-BE-NEXT:    addi r3, r3, global_1 at toc@l
 ; CHECK-BE-NEXT:    li r4, 0
@@ -247,10 +247,10 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-BE-NEXT:    lxsihzx v2, 0, r3
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xscvsxdsp f0, v2
-; CHECK-BE-NEXT:    bc 12, 4*cr2+lt, .LBB0_12
+; CHECK-BE-NEXT:    bc 12, 4*cr2+lt, .LBB0_13
 ; CHECK-BE-NEXT:  # %bb.6: # %bb42
 ; CHECK-BE-NEXT:    xxspltidp vs1, 1069547520
-; CHECK-BE-NEXT:    b .LBB0_13
+; CHECK-BE-NEXT:    b .LBB0_14
 ; CHECK-BE-NEXT:  .LBB0_7: # %bb19
 ; CHECK-BE-NEXT:    setnbc r3, 4*cr2+un
 ; CHECK-BE-NEXT:    addis r4, r2, global_4 at toc@ha
@@ -292,15 +292,15 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-BE-NEXT:    rlwimi r3, r4, 21, 11, 11
 ; CHECK-BE-NEXT:    mtocrf 32, r3
 ; CHECK-BE-NEXT:    b .LBB0_16
-; CHECK-BE-NEXT:  .LBB0_12:
+; CHECK-BE-NEXT:  .LBB0_12: # %bb41
+; CHECK-BE-NEXT:    # implicit-def: $r3
+; CHECK-BE-NEXT:    b .LBB0_15
+; CHECK-BE-NEXT:  .LBB0_13:
 ; CHECK-BE-NEXT:    xxspltidp vs1, 1071644672
-; CHECK-BE-NEXT:  .LBB0_13: # %bb42
+; CHECK-BE-NEXT:  .LBB0_14: # %bb42
 ; CHECK-BE-NEXT:    xsmulsp f0, f1, f0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mffprwz r3, f0
-; CHECK-BE-NEXT:    b .LBB0_15
-; CHECK-BE-NEXT:  .LBB0_14: # %bb41
-; CHECK-BE-NEXT:    # implicit-def: $r3
 ; CHECK-BE-NEXT:  .LBB0_15: # %bb50
 ; CHECK-BE-NEXT:    li r4, 0
 ; CHECK-BE-NEXT:    xxspltidp vs3, -1082130432
diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll
index 379bd6c070c777f..d7c70ab7f066c80 100644
--- a/llvm/test/CodeGen/PowerPC/pr43527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43527.ll
@@ -5,9 +5,9 @@
 define dso_local void @test(i64 %arg, i64 %arg1) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_5
+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_6
 ; CHECK-NEXT:  # %bb.1: # %bb3
-; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_6
+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_5
 ; CHECK-NEXT:  # %bb.2: # %bb4
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
@@ -37,8 +37,8 @@ define dso_local void @test(i64 %arg, i64 %arg1) {
 ; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB0_5: # %bb2
-; CHECK-NEXT:  .LBB0_6: # %bb14
+; CHECK-NEXT:  .LBB0_5: # %bb14
+; CHECK-NEXT:  .LBB0_6: # %bb2
 bb:
   br i1 undef, label %bb3, label %bb2
 
diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll
index 0f8014df8adca93..6b3d578f6b33829 100644
--- a/llvm/test/CodeGen/PowerPC/pr45448.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45448.ll
@@ -7,17 +7,17 @@ define hidden void @julia_tryparse_internal_45896() #0 {
 ; CHECK:       # %bb.0: # %top
 ; CHECK-NEXT:    ld r3, 0(r3)
 ; CHECK-NEXT:    cmpldi r3, 0
-; CHECK-NEXT:    beq cr0, .LBB0_3
+; CHECK-NEXT:    beq cr0, .LBB0_6
 ; CHECK-NEXT:  # %bb.1: # %top
 ; CHECK-NEXT:    cmpldi r3, 10
-; CHECK-NEXT:    beq cr0, .LBB0_4
+; CHECK-NEXT:    beq cr0, .LBB0_3
 ; CHECK-NEXT:  # %bb.2: # %top
-; CHECK-NEXT:  .LBB0_3: # %fail194
-; CHECK-NEXT:  .LBB0_4: # %L294
-; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_6
-; CHECK-NEXT:  # %bb.5: # %L294
+; CHECK-NEXT:  .LBB0_3: # %L294
+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %L294
 ; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_7
-; CHECK-NEXT:  .LBB0_6: # %L1057.preheader
+; CHECK-NEXT:  .LBB0_5: # %L1057.preheader
+; CHECK-NEXT:  .LBB0_6: # %fail194
 ; CHECK-NEXT:  .LBB0_7: # %L670
 ; CHECK-NEXT:    li r5, -3
 ; CHECK-NEXT:    cmpdi r3, 0
diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index d1129b1825aeef3..409cd7996ee5422 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -14,15 +14,17 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    li r12, -6144
 ; CHECK-LE-NEXT:    add r0, r12, r0
 ; CHECK-LE-NEXT:    sub r12, r0, r1
-; CHECK-LE-NEXT:    cmpdi r12, -4096
-; CHECK-LE-NEXT:    bge cr0, .LBB0_2
+; CHECK-LE-NEXT:    b .LBB0_2
+; CHECK-LE-NEXT:    .p2align 4
 ; CHECK-LE-NEXT:  .LBB0_1: # %entry
 ; CHECK-LE-NEXT:    #
 ; CHECK-LE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:  .LBB0_2: # %entry
+; CHECK-LE-NEXT:    #
 ; CHECK-LE-NEXT:    cmpdi r12, -4096
 ; CHECK-LE-NEXT:    blt cr0, .LBB0_1
-; CHECK-LE-NEXT:  .LBB0_2: # %entry
+; CHECK-LE-NEXT:  # %bb.3: # %entry
 ; CHECK-LE-NEXT:    stdux r30, r1, r12
 ; CHECK-LE-NEXT:    mr r0, r30
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
@@ -46,13 +48,13 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    sub r5, r5, r6
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    beq cr0, .LBB0_4
-; CHECK-LE-NEXT:  .LBB0_3: # %entry
+; CHECK-LE-NEXT:    beq cr0, .LBB0_5
+; CHECK-LE-NEXT:  .LBB0_4: # %entry
 ; CHECK-LE-NEXT:    #
 ; CHECK-LE-NEXT:    stdu r3, -4096(r1)
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    bne cr0, .LBB0_3
-; CHECK-LE-NEXT:  .LBB0_4: # %entry
+; CHECK-LE-NEXT:    bne cr0, .LBB0_4
+; CHECK-LE-NEXT:  .LBB0_5: # %entry
 ; CHECK-LE-NEXT:    addi r3, r1, 2048
 ; CHECK-LE-NEXT:    lbz r3, 0(r3)
 ; CHECK-LE-NEXT:    mr r1, r30
diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll
index 002dd8f0d167a90..1b76555b67141fd 100644
--- a/llvm/test/CodeGen/PowerPC/pr48519.ll
+++ b/llvm/test/CodeGen/PowerPC/pr48519.ll
@@ -156,28 +156,28 @@ define void @func_48786() #0 {
 ; CHECK-NEXT:    ld r3, 0(r3)
 ; CHECK-NEXT:    cmpdi r3, 0
 ; CHECK-NEXT:    crnot 4*cr2+lt, eq
-; CHECK-NEXT:    b .LBB2_2
+; CHECK-NEXT:    b .LBB2_4
 ; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  .LBB2_1: # %bb10
-; CHECK-NEXT:    #
-; CHECK-NEXT:    addi r30, r30, -1
-; CHECK-NEXT:    cmpldi r30, 0
-; CHECK-NEXT:    bc 4, gt, .LBB2_5
-; CHECK-NEXT:  .LBB2_2: # %bb2
-; CHECK-NEXT:    #
-; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB2_1
-; CHECK-NEXT:  # %bb.3: # %bb4
+; CHECK-NEXT:  .LBB2_1: # %bb4
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lhz r3, 0(r3)
 ; CHECK-NEXT:    bl __gnu_h2f_ieee
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    bc 4, 4*cr2+lt, .LBB2_6
-; CHECK-NEXT:  # %bb.4: # %bb8
+; CHECK-NEXT:  # %bb.2: # %bb8
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    sth r3, 0(0)
-; CHECK-NEXT:    b .LBB2_1
+; CHECK-NEXT:  .LBB2_3: # %bb10
+; CHECK-NEXT:    #
+; CHECK-NEXT:    addi r30, r30, -1
+; CHECK-NEXT:    cmpldi r30, 0
+; CHECK-NEXT:    bc 4, gt, .LBB2_5
+; CHECK-NEXT:  .LBB2_4: # %bb2
+; CHECK-NEXT:    #
+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB2_1
+; CHECK-NEXT:    b .LBB2_3
 ; CHECK-NEXT:  .LBB2_5: # %bb14
 ; CHECK-NEXT:    ld r30, 32(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 48
@@ -195,24 +195,24 @@ define void @func_48786() #0 {
 ; CHECK-P9-NEXT:    mtctr r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    crnot 4*cr5+lt, eq
-; CHECK-P9-NEXT:    b .LBB2_2
+; CHECK-P9-NEXT:    b .LBB2_4
 ; CHECK-P9-NEXT:    .p2align 5
-; CHECK-P9-NEXT:  .LBB2_1: # %bb10
-; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    bdzlr
-; CHECK-P9-NEXT:  .LBB2_2: # %bb2
-; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    bc 12, 4*cr5+lt, .LBB2_1
-; CHECK-P9-NEXT:  # %bb.3: # %bb4
+; CHECK-P9-NEXT:  .LBB2_1: # %bb4
 ; CHECK-P9-NEXT:    #
 ; CHECK-P9-NEXT:    lxsihzx f0, 0, r3
 ; CHECK-P9-NEXT:    xscvhpdp f0, f0
 ; CHECK-P9-NEXT:    bc 4, 4*cr5+lt, .LBB2_5
-; CHECK-P9-NEXT:  # %bb.4: # %bb8
+; CHECK-P9-NEXT:  # %bb.2: # %bb8
 ; CHECK-P9-NEXT:    #
 ; CHECK-P9-NEXT:    xscvdphp f0, f0
 ; CHECK-P9-NEXT:    stxsihx f0, 0, r3
-; CHECK-P9-NEXT:    b .LBB2_1
+; CHECK-P9-NEXT:  .LBB2_3: # %bb10
+; CHECK-P9-NEXT:    #
+; CHECK-P9-NEXT:    bdzlr
+; CHECK-P9-NEXT:  .LBB2_4: # %bb2
+; CHECK-P9-NEXT:    #
+; CHECK-P9-NEXT:    bc 4, 4*cr5+lt, .LBB2_1
+; CHECK-P9-NEXT:    b .LBB2_3
 ; CHECK-P9-NEXT:  .LBB2_5: # %bb15
 bb:
   %i = load i64, ptr addrspace(11) undef, align 8
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index 4b19cb30aba2a90..009757c8fb90e04 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -606,14 +606,15 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    li r12, -10240
 ; CHECK-LE-NEXT:    add r0, r12, r0
 ; CHECK-LE-NEXT:    sub r12, r0, r1
-; CHECK-LE-NEXT:    cmpdi r12, -4096
-; CHECK-LE-NEXT:    bge cr0, .LBB9_2
+; CHECK-LE-NEXT:    b .LBB9_2
+; CHECK-LE-NEXT:    .p2align 4
 ; CHECK-LE-NEXT:  .LBB9_1:
 ; CHECK-LE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:  .LBB9_2:
 ; CHECK-LE-NEXT:    cmpdi r12, -4096
 ; CHECK-LE-NEXT:    blt cr0, .LBB9_1
-; CHECK-LE-NEXT:  .LBB9_2:
+; CHECK-LE-NEXT:  # %bb.3:
 ; CHECK-LE-NEXT:    stdux r30, r1, r12
 ; CHECK-LE-NEXT:    mr r0, r30
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
@@ -637,14 +638,14 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    li r12, -10240
 ; CHECK-BE-NEXT:    add r0, r12, r0
 ; CHECK-BE-NEXT:    sub r12, r0, r1
-; CHECK-BE-NEXT:    cmpdi r12, -4096
-; CHECK-BE-NEXT:    bge cr0, .LBB9_2
+; CHECK-BE-NEXT:    b .LBB9_2
 ; CHECK-BE-NEXT:  .LBB9_1:
 ; CHECK-BE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-BE-NEXT:    addi r12, r12, 4096
+; CHECK-BE-NEXT:  .LBB9_2:
 ; CHECK-BE-NEXT:    cmpdi r12, -4096
 ; CHECK-BE-NEXT:    blt cr0, .LBB9_1
-; CHECK-BE-NEXT:  .LBB9_2:
+; CHECK-BE-NEXT:  # %bb.3:
 ; CHECK-BE-NEXT:    stdux r30, r1, r12
 ; CHECK-BE-NEXT:    mr r0, r30
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r0
@@ -667,14 +668,14 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 {
 ; CHECK-32-NEXT:    add r0, r12, r0
 ; CHECK-32-NEXT:    sub r12, r0, r1
 ; CHECK-32-NEXT:    mr r0, r1
-; CHECK-32-NEXT:    cmpwi r12, -4096
-; CHECK-32-NEXT:    bge cr0, .LBB9_2
+; CHECK-32-NEXT:    b .LBB9_2
 ; CHECK-32-NEXT:  .LBB9_1:
 ; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    addi r12, r12, 4096
+; CHECK-32-NEXT:  .LBB9_2:
 ; CHECK-32-NEXT:    cmpwi r12, -4096
 ; CHECK-32-NEXT:    blt cr0, .LBB9_1
-; CHECK-32-NEXT:  .LBB9_2:
+; CHECK-32-NEXT:  # %bb.3:
 ; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
@@ -713,14 +714,15 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ; CHECK-LE-NEXT:    li r12, -5120
 ; CHECK-LE-NEXT:    add r0, r12, r0
 ; CHECK-LE-NEXT:    sub r12, r0, r1
-; CHECK-LE-NEXT:    cmpdi r12, -4096
-; CHECK-LE-NEXT:    bge cr0, .LBB10_2
+; CHECK-LE-NEXT:    b .LBB10_2
+; CHECK-LE-NEXT:    .p2align 4
 ; CHECK-LE-NEXT:  .LBB10_1:
 ; CHECK-LE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:  .LBB10_2:
 ; CHECK-LE-NEXT:    cmpdi r12, -4096
 ; CHECK-LE-NEXT:    blt cr0, .LBB10_1
-; CHECK-LE-NEXT:  .LBB10_2:
+; CHECK-LE-NEXT:  # %bb.3:
 ; CHECK-LE-NEXT:    stdux r30, r1, r12
 ; CHECK-LE-NEXT:    mr r0, r30
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
@@ -744,14 +746,14 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    li r12, -5120
 ; CHECK-BE-NEXT:    add r0, r12, r0
 ; CHECK-BE-NEXT:    sub r12, r0, r1
-; CHECK-BE-NEXT:    cmpdi r12, -4096
-; CHECK-BE-NEXT:    bge cr0, .LBB10_2
+; CHECK-BE-NEXT:    b .LBB10_2
 ; CHECK-BE-NEXT:  .LBB10_1:
 ; CHECK-BE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-BE-NEXT:    addi r12, r12, 4096
+; CHECK-BE-NEXT:  .LBB10_2:
 ; CHECK-BE-NEXT:    cmpdi r12, -4096
 ; CHECK-BE-NEXT:    blt cr0, .LBB10_1
-; CHECK-BE-NEXT:  .LBB10_2:
+; CHECK-BE-NEXT:  # %bb.3:
 ; CHECK-BE-NEXT:    stdux r30, r1, r12
 ; CHECK-BE-NEXT:    mr r0, r30
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r0
@@ -774,14 +776,14 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 {
 ; CHECK-32-NEXT:    add r0, r12, r0
 ; CHECK-32-NEXT:    sub r12, r0, r1
 ; CHECK-32-NEXT:    mr r0, r1
-; CHECK-32-NEXT:    cmpwi r12, -4096
-; CHECK-32-NEXT:    bge cr0, .LBB10_2
+; CHECK-32-NEXT:    b .LBB10_2
 ; CHECK-32-NEXT:  .LBB10_1:
 ; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    addi r12, r12, 4096
+; CHECK-32-NEXT:  .LBB10_2:
 ; CHECK-32-NEXT:    cmpwi r12, -4096
 ; CHECK-32-NEXT:    blt cr0, .LBB10_1
-; CHECK-32-NEXT:  .LBB10_2:
+; CHECK-32-NEXT:  # %bb.3:
 ; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
@@ -821,14 +823,15 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    ori r12, r12, 32768
 ; CHECK-LE-NEXT:    add r0, r12, r0
 ; CHECK-LE-NEXT:    sub r12, r0, r1
-; CHECK-LE-NEXT:    cmpdi r12, -4096
-; CHECK-LE-NEXT:    bge cr0, .LBB11_2
+; CHECK-LE-NEXT:    b .LBB11_2
+; CHECK-LE-NEXT:    .p2align 4
 ; CHECK-LE-NEXT:  .LBB11_1:
 ; CHECK-LE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-LE-NEXT:    addi r12, r12, 4096
+; CHECK-LE-NEXT:  .LBB11_2:
 ; CHECK-LE-NEXT:    cmpdi r12, -4096
 ; CHECK-LE-NEXT:    blt cr0, .LBB11_1
-; CHECK-LE-NEXT:  .LBB11_2:
+; CHECK-LE-NEXT:  # %bb.3:
 ; CHECK-LE-NEXT:    stdux r30, r1, r12
 ; CHECK-LE-NEXT:    mr r0, r30
 ; CHECK-LE-NEXT:    .cfi_def_cfa_register r0
@@ -858,12 +861,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-LE-NEXT:    sub r5, r5, r6
 ; CHECK-LE-NEXT:    stdux r3, r1, r5
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    beq cr0, .LBB11_4
-; CHECK-LE-NEXT:  .LBB11_3:
+; CHECK-LE-NEXT:    beq cr0, .LBB11_5
+; CHECK-LE-NEXT:  .LBB11_4:
 ; CHECK-LE-NEXT:    stdu r3, -4096(r1)
 ; CHECK-LE-NEXT:    cmpd r1, r4
-; CHECK-LE-NEXT:    bne cr0, .LBB11_3
-; CHECK-LE-NEXT:  .LBB11_4:
+; CHECK-LE-NEXT:    bne cr0, .LBB11_4
+; CHECK-LE-NEXT:  .LBB11_5:
 ; CHECK-LE-NEXT:    addi r3, r1, -32768
 ; CHECK-LE-NEXT:    lbz r3, 0(r3)
 ; CHECK-LE-NEXT:    mr r1, r30
@@ -882,14 +885,14 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-BE-NEXT:    ori r12, r12, 32768
 ; CHECK-BE-NEXT:    add r0, r12, r0
 ; CHECK-BE-NEXT:    sub r12, r0, r1
-; CHECK-BE-NEXT:    cmpdi r12, -4096
-; CHECK-BE-NEXT:    bge cr0, .LBB11_2
+; CHECK-BE-NEXT:    b .LBB11_2
 ; CHECK-BE-NEXT:  .LBB11_1:
 ; CHECK-BE-NEXT:    stdu r30, -4096(r1)
 ; CHECK-BE-NEXT:    addi r12, r12, 4096
+; CHECK-BE-NEXT:  .LBB11_2:
 ; CHECK-BE-NEXT:    cmpdi r12, -4096
 ; CHECK-BE-NEXT:    blt cr0, .LBB11_1
-; CHECK-BE-NEXT:  .LBB11_2:
+; CHECK-BE-NEXT:  # %bb.3:
 ; CHECK-BE-NEXT:    stdux r30, r1, r12
 ; CHECK-BE-NEXT:    mr r0, r30
 ; CHECK-BE-NEXT:    .cfi_def_cfa_register r0
@@ -919,12 +922,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-BE-NEXT:    add r4, r1, r7
 ; CHECK-BE-NEXT:    stdux r3, r1, r5
 ; CHECK-BE-NEXT:    cmpd r1, r4
-; CHECK-BE-NEXT:    beq cr0, .LBB11_4
-; CHECK-BE-NEXT:  .LBB11_3:
+; CHECK-BE-NEXT:    beq cr0, .LBB11_5
+; CHECK-BE-NEXT:  .LBB11_4:
 ; CHECK-BE-NEXT:    stdu r3, -4096(r1)
 ; CHECK-BE-NEXT:    cmpd r1, r4
-; CHECK-BE-NEXT:    bne cr0, .LBB11_3
-; CHECK-BE-NEXT:  .LBB11_4:
+; CHECK-BE-NEXT:    bne cr0, .LBB11_4
+; CHECK-BE-NEXT:  .LBB11_5:
 ; CHECK-BE-NEXT:    addi r3, r1, -32768
 ; CHECK-BE-NEXT:    lbz r3, 0(r3)
 ; CHECK-BE-NEXT:    mr r1, r30
@@ -941,14 +944,14 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-32-NEXT:    add r0, r12, r0
 ; CHECK-32-NEXT:    sub r12, r0, r1
 ; CHECK-32-NEXT:    mr r0, r1
-; CHECK-32-NEXT:    cmpwi r12, -4096
-; CHECK-32-NEXT:    bge cr0, .LBB11_2
+; CHECK-32-NEXT:    b .LBB11_2
 ; CHECK-32-NEXT:  .LBB11_1:
 ; CHECK-32-NEXT:    stwu r0, -4096(r1)
 ; CHECK-32-NEXT:    addi r12, r12, 4096
+; CHECK-32-NEXT:  .LBB11_2:
 ; CHECK-32-NEXT:    cmpwi r12, -4096
 ; CHECK-32-NEXT:    blt cr0, .LBB11_1
-; CHECK-32-NEXT:  .LBB11_2:
+; CHECK-32-NEXT:  # %bb.3:
 ; CHECK-32-NEXT:    stwux r0, r1, r12
 ; CHECK-32-NEXT:    .cfi_def_cfa_register r0
 ; CHECK-32-NEXT:    sub r0, r1, r0
@@ -982,12 +985,12 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
 ; CHECK-32-NEXT:    add r4, r1, r7
 ; CHECK-32-NEXT:    stwux r3, r1, r5
 ; CHECK-32-NEXT:    cmpw r1, r4
-; CHECK-32-NEXT:    beq cr0, .LBB11_4
-; CHECK-32-NEXT:  .LBB11_3:
+; CHECK-32-NEXT:    beq cr0, .LBB11_5
+; CHECK-32-NEXT:  .LBB11_4:
 ; CHECK-32-NEXT:    stwu r3, -4096(r1)
 ; CHECK-32-NEXT:    cmpw r1, r4
-; CHECK-32-NEXT:    bne cr0, .LBB11_3
-; CHECK-32-NEXT:  .LBB11_4:
+; CHECK-32-NEXT:    bne cr0, .LBB11_4
+; CHECK-32-NEXT:  .LBB11_5:
 ; CHECK-32-NEXT:    addi r3, r1, -32768
 ; CHECK-32-NEXT:    lbz r3, 0(r3)
 ; CHECK-32-NEXT:    lwz r31, 0(r1)
diff --git a/llvm/test/CodeGen/RISCV/shrinkwrap-jump-table.ll b/llvm/test/CodeGen/RISCV/shrinkwrap-jump-table.ll
index 99780c5e0d444b6..75af75ce75ed9c0 100644
--- a/llvm/test/CodeGen/RISCV/shrinkwrap-jump-table.ll
+++ b/llvm/test/CodeGen/RISCV/shrinkwrap-jump-table.ll
@@ -14,7 +14,7 @@ define dso_local signext i32 @test_shrinkwrap_jump_table(ptr noundef %m) local_u
 ; CHECK-NEXT:    lw a1, 0(a0)
 ; CHECK-NEXT:    addi a1, a1, -1
 ; CHECK-NEXT:    li a2, 4
-; CHECK-NEXT:    bltu a2, a1, .LBB0_3
+; CHECK-NEXT:    bltu a2, a1, .LBB0_7
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    slli a1, a1, 2
 ; CHECK-NEXT:    lui a2, %hi(.LJTI0_0)
@@ -24,7 +24,15 @@ define dso_local signext i32 @test_shrinkwrap_jump_table(ptr noundef %m) local_u
 ; CHECK-NEXT:    jr a1
 ; CHECK-NEXT:  .LBB0_2: # %sw.bb
 ; CHECK-NEXT:    tail func1 at plt
-; CHECK-NEXT:  .LBB0_3: # %sw.default
+; CHECK-NEXT:  .LBB0_3: # %sw.bb1
+; CHECK-NEXT:    tail func2 at plt
+; CHECK-NEXT:  .LBB0_4: # %sw.bb3
+; CHECK-NEXT:    tail func3 at plt
+; CHECK-NEXT:  .LBB0_5: # %sw.bb5
+; CHECK-NEXT:    tail func4 at plt
+; CHECK-NEXT:  .LBB0_6: # %sw.bb7
+; CHECK-NEXT:    tail func5 at plt
+; CHECK-NEXT:  .LBB0_7: # %sw.default
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
@@ -34,14 +42,6 @@ define dso_local signext i32 @test_shrinkwrap_jump_table(ptr noundef %m) local_u
 ; CHECK-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB0_4: # %sw.bb1
-; CHECK-NEXT:    tail func2 at plt
-; CHECK-NEXT:  .LBB0_5: # %sw.bb3
-; CHECK-NEXT:    tail func3 at plt
-; CHECK-NEXT:  .LBB0_6: # %sw.bb5
-; CHECK-NEXT:    tail func4 at plt
-; CHECK-NEXT:  .LBB0_7: # %sw.bb7
-; CHECK-NEXT:    tail func5 at plt
 entry:
   %0 = load i32, ptr %m, align 4
   switch i32 %0, label %sw.default [
diff --git a/llvm/test/CodeGen/Thumb/pr42760.ll b/llvm/test/CodeGen/Thumb/pr42760.ll
index fc9a18bb33420f0..cba2de3324df57b 100644
--- a/llvm/test/CodeGen/Thumb/pr42760.ll
+++ b/llvm/test/CodeGen/Thumb/pr42760.ll
@@ -6,31 +6,34 @@ define hidden void @test() {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    movs r0, #1
 ; CHECK-NEXT:    lsls r1, r0, #2
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_1: @ %bb2
-; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_1: @ %bb
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bne .LBB0_7
+; CHECK-NEXT:  .LBB0_2: @ %bb2
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    bne .LBB0_6
-; CHECK-NEXT:  .LBB0_2: @ %switch
+; CHECK-NEXT:    bne .LBB0_7
+; CHECK-NEXT:  .LBB0_3: @ %switch
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    adr r2, .LJTI0_0
 ; CHECK-NEXT:    ldr r2, [r2, r1]
 ; CHECK-NEXT:    mov pc, r2
-; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  @ %bb.4:
 ; CHECK-NEXT:    .p2align 2
 ; CHECK-NEXT:  .LJTI0_0:
+; CHECK-NEXT:    .long .LBB0_7+1
 ; CHECK-NEXT:    .long .LBB0_6+1
-; CHECK-NEXT:    .long .LBB0_4+1
-; CHECK-NEXT:    .long .LBB0_6+1
+; CHECK-NEXT:    .long .LBB0_7+1
 ; CHECK-NEXT:    .long .LBB0_5+1
-; CHECK-NEXT:  .LBB0_4: @ %switch
-; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:  .LBB0_5: @ %switch
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    b .LBB0_1
-; CHECK-NEXT:  .LBB0_5: @ %bb
-; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    beq .LBB0_1
-; CHECK-NEXT:  .LBB0_6: @ %dead
+; CHECK-NEXT:  .LBB0_6: @ %switch
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:  .LBB0_7: @ %dead
 entry:
   br label %switch
 
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
index e0c045ba0440db3..101f8f6ef6eeb1d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll
@@ -12,25 +12,25 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i3
 ; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
 ; CHECK-NEXT:    lsl.w r12, r3, #2
 ; CHECK-NEXT:    movs r7, #0
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_2: @ %for.body
+; CHECK-NEXT:    b .LBB0_3
+; CHECK-NEXT:  .LBB0_2: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vldrb.u8 q0, [r4], #16
+; CHECK-NEXT:    vstrb.8 q0, [r5], #16
+; CHECK-NEXT:    letp lr, .LBB0_2
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_3: @ %for.body
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_2 Depth 2
 ; CHECK-NEXT:    adds r4, r1, r7
 ; CHECK-NEXT:    adds r5, r0, r7
-; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_3
-; CHECK-NEXT:    b .LBB0_4
-; CHECK-NEXT:  .LBB0_3: @ %for.body
-; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_4
+; CHECK-NEXT:    b .LBB0_2
+; CHECK-NEXT:  .LBB0_4: @ %for.body
+; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    add r7, r12
 ; CHECK-NEXT:    subs r2, #1
 ; CHECK-NEXT:    beq .LBB0_5
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_4: @ Parent Loop BB0_2 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vldrb.u8 q0, [r4], #16
-; CHECK-NEXT:    vstrb.8 q0, [r5], #16
-; CHECK-NEXT:    letp lr, .LBB0_4
 ; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:  .LBB0_5:
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
@@ -65,23 +65,23 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
 ; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    lsl.w r12, r2, #2
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
-; CHECK-NEXT:    b .LBB1_2
-; CHECK-NEXT:  .LBB1_2: @ %for.body
-; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB1_4 Depth 2
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    wlstp.8 lr, r2, .LBB1_3
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:  .LBB1_2: @ Parent Loop BB1_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q0, [r4], #16
+; CHECK-NEXT:    letp lr, .LBB1_2
 ; CHECK-NEXT:    b .LBB1_4
 ; CHECK-NEXT:  .LBB1_3: @ %for.body
-; CHECK-NEXT:    @ in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB1_2 Depth 2
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    wlstp.8 lr, r2, .LBB1_4
+; CHECK-NEXT:    b .LBB1_2
+; CHECK-NEXT:  .LBB1_4: @ %for.body
+; CHECK-NEXT:    @ in Loop: Header=BB1_3 Depth=1
 ; CHECK-NEXT:    add r0, r12
 ; CHECK-NEXT:    subs r1, #1
 ; CHECK-NEXT:    beq .LBB1_5
-; CHECK-NEXT:    b .LBB1_2
-; CHECK-NEXT:  .LBB1_4: @ Parent Loop BB1_2 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vstrb.8 q0, [r4], #16
-; CHECK-NEXT:    letp lr, .LBB1_4
 ; CHECK-NEXT:    b .LBB1_3
 ; CHECK-NEXT:  .LBB1_5:
 ; CHECK-NEXT:    pop.w {r4, lr}
diff --git a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
index 59e346588754a40..ed895be96cd8910 100644
--- a/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-indirect-branches.ll
@@ -7,30 +7,30 @@ define internal i32 @table_switch(i32 %x) {
 ; CHECK-NEXT:    bti
 ; CHECK-NEXT:    subs r1, r0, #1
 ; CHECK-NEXT:    cmp r1, #3
-; CHECK-NEXT:    bhi .LBB0_4
+; CHECK-NEXT:    bhi .LBB0_6
 ; CHECK-NEXT:  @ %bb.1: @ %entry
 ; CHECK-NEXT:  .LCPI0_0:
 ; CHECK-NEXT:    tbb [pc, r1]
 ; CHECK-NEXT:  @ %bb.2:
 ; CHECK-NEXT:  .LJTI0_0:
-; CHECK-NEXT:    .byte (.LBB0_5-(.LCPI0_0+4))/2
-; CHECK-NEXT:    .byte (.LBB0_3-(.LCPI0_0+4))/2
-; CHECK-NEXT:    .byte (.LBB0_6-(.LCPI0_0+4))/2
 ; CHECK-NEXT:    .byte (.LBB0_7-(.LCPI0_0+4))/2
+; CHECK-NEXT:    .byte (.LBB0_3-(.LCPI0_0+4))/2
+; CHECK-NEXT:    .byte (.LBB0_4-(.LCPI0_0+4))/2
+; CHECK-NEXT:    .byte (.LBB0_5-(.LCPI0_0+4))/2
 ; CHECK-NEXT:    .p2align 1
 ; CHECK-NEXT:  .LBB0_3: @ %bb2
 ; CHECK-NEXT:    movs r0, #2
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:  .LBB0_4: @ %sw.epilog
-; CHECK-NEXT:    movs r0, #0
-; CHECK-NEXT:  .LBB0_5: @ %return
-; CHECK-NEXT:    bx lr
-; CHECK-NEXT:  .LBB0_6: @ %bb3
+; CHECK-NEXT:  .LBB0_4: @ %bb3
 ; CHECK-NEXT:    movs r0, #3
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:  .LBB0_7: @ %bb4
+; CHECK-NEXT:  .LBB0_5: @ %bb4
 ; CHECK-NEXT:    movs r0, #4
 ; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_6: @ %sw.epilog
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:  .LBB0_7: @ %return
+; CHECK-NEXT:    bx lr
 entry:
   switch i32 %x, label %sw.epilog [
     i32 1, label %bb1
@@ -93,23 +93,6 @@ declare void @consume_exception(ptr)
 declare i32 @__gxx_personality_v0(...)
 
 define internal i32 @exception_handling(i32 %0) personality ptr @__gxx_personality_v0 {
-; CHECK-LABEL: exception_handling:
-; CHECK:       @ %bb.0:
-; CHECK-NEXT:    bti
-; CHECK-NEXT:    .save  {r7, lr}
-; CHECK-NEXT:    push   {r7, lr}
-; CHECK-NEXT:  .Ltmp0:
-; CHECK-NEXT:    bl     may_throw
-; CHECK-NEXT:  .Ltmp1:
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:    movs   r0, #0
-; CHECK-NEXT:    pop    {r7, pc}
-; CHECK-NEXT:  .LBB2_2:
-; CHECK-NEXT:  .Ltmp2:
-; CHECK-NEXT:    bti
-; CHECK-NEXT:    bl     consume_exception
-; CHECK-NEXT:    movs   r0, #1
-; CHECK-NEXT:    pop    {r7, pc}
 entry:
   invoke void @may_throw()
           to label %return unwind label %lpad
diff --git a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll
index 98fe30039259f03..1aeecdf1e08f36e 100644
--- a/llvm/test/CodeGen/Thumb2/constant-hoisting.ll
+++ b/llvm/test/CodeGen/Thumb2/constant-hoisting.ll
@@ -7,27 +7,27 @@ define i32 @test_values(i32 %a, i32 %b) minsize optsize {
 ; CHECK-V6M:         mov r2, r0
 ; CHECK-V6M-NEXT:    ldr r0, .LCPI0_0
 ; CHECK-V6M-NEXT:    cmp r2, #50
-; CHECK-V6M-NEXT:    beq .LBB0_5
-; CHECK-V6M-NEXT:    cmp r2, #1
 ; CHECK-V6M-NEXT:    beq .LBB0_7
+; CHECK-V6M-NEXT:    cmp r2, #1
+; CHECK-V6M-NEXT:    beq .LBB0_5
 ; CHECK-V6M-NEXT:    cmp r2, #30
-; CHECK-V6M-NEXT:    beq .LBB0_8
+; CHECK-V6M-NEXT:    beq .LBB0_6
 ; CHECK-V6M-NEXT:    cmp r2, #0
-; CHECK-V6M-NEXT:    bne .LBB0_6
+; CHECK-V6M-NEXT:    bne .LBB0_8
 ; CHECK-V6M-NEXT:    adds r0, r1, r0
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:  .LBB0_5:
 ; CHECK-V6M-NEXT:    adds r0, r0, r1
-; CHECK-V6M-NEXT:    adds r0, r0, #4
+; CHECK-V6M-NEXT:    adds r0, r0, #1
+; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:  .LBB0_6:
+; CHECK-V6M-NEXT:    adds r0, r0, r1
+; CHECK-V6M-NEXT:    adds r0, r0, #2
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:  .LBB0_7:
 ; CHECK-V6M-NEXT:    adds r0, r0, r1
-; CHECK-V6M-NEXT:    adds r0, r0, #1
-; CHECK-V6M-NEXT:    bx lr
+; CHECK-V6M-NEXT:    adds r0, r0, #4
 ; CHECK-V6M-NEXT:  .LBB0_8:
-; CHECK-V6M-NEXT:    adds r0, r0, r1
-; CHECK-V6M-NEXT:    adds r0, r0, #2
 ; CHECK-V6M-NEXT:    bx lr
 ; CHECK-V6M-NEXT:    .p2align 2
 ; CHECK-V6M-NEXT:  .LCPI0_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index 39bf97d880ea3f4..f0937059a8e7a2c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -428,9 +428,9 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:  .LBB1_4: @ %for.cond2.preheader
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-NEXT:    @ Child Loop BB1_17 Depth 2
-; CHECK-NEXT:    @ Child Loop BB1_8 Depth 2
-; CHECK-NEXT:    @ Child Loop BB1_10 Depth 3
-; CHECK-NEXT:    @ Child Loop BB1_12 Depth 3
+; CHECK-NEXT:    @ Child Loop BB1_11 Depth 2
+; CHECK-NEXT:    @ Child Loop BB1_8 Depth 3
+; CHECK-NEXT:    @ Child Loop BB1_13 Depth 3
 ; CHECK-NEXT:    cmp.w r11, #2
 ; CHECK-NEXT:    bgt .LBB1_3
 ; CHECK-NEXT:  @ %bb.5: @ %for.body6.lr.ph
@@ -451,23 +451,9 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    mov lr, r7
 ; CHECK-NEXT:    mov r7, r4
 ; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    b .LBB1_8
-; CHECK-NEXT:  .LBB1_7: @ %for.cond.cleanup17.us
-; CHECK-NEXT:    @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT:    add.w r11, r3, #7
-; CHECK-NEXT:    cmn.w r3, #4
-; CHECK-NEXT:    mov.w r10, #0
-; CHECK-NEXT:    mov r3, r11
-; CHECK-NEXT:    bge .LBB1_3
-; CHECK-NEXT:  .LBB1_8: @ %for.body6.us
-; CHECK-NEXT:    @ Parent Loop BB1_4 Depth=1
-; CHECK-NEXT:    @ => This Loop Header: Depth=2
-; CHECK-NEXT:    @ Child Loop BB1_10 Depth 3
-; CHECK-NEXT:    @ Child Loop BB1_12 Depth 3
-; CHECK-NEXT:    movs r1, #0
-; CHECK-NEXT:    cbz r2, .LBB1_11
-; CHECK-NEXT:  @ %bb.9: @ %for.body13.us51.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB1_8 Depth=2
+; CHECK-NEXT:    b .LBB1_11
+; CHECK-NEXT:  .LBB1_7: @ %for.body13.us51.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB1_11 Depth=2
 ; CHECK-NEXT:    movw r4, :lower16:a
 ; CHECK-NEXT:    vmov q1, q4
 ; CHECK-NEXT:    movt r4, :upper16:a
@@ -476,9 +462,9 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    movt r4, :upper16:b
 ; CHECK-NEXT:    str r1, [r4]
 ; CHECK-NEXT:    mov r4, r7
-; CHECK-NEXT:  .LBB1_10: @ %vector.body111
+; CHECK-NEXT:  .LBB1_8: @ %vector.body111
 ; CHECK-NEXT:    @ Parent Loop BB1_4 Depth=1
-; CHECK-NEXT:    @ Parent Loop BB1_8 Depth=2
+; CHECK-NEXT:    @ Parent Loop BB1_11 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    vqadd.u32 q2, q5, r1
 ; CHECK-NEXT:    subs r4, #4
@@ -489,15 +475,32 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    vadd.i32 q1, q1, r9
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrwt.32 q0, [q2]
-; CHECK-NEXT:    bne .LBB1_10
-; CHECK-NEXT:    b .LBB1_13
-; CHECK-NEXT:  .LBB1_11: @ %vector.body.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB1_8 Depth=2
+; CHECK-NEXT:    bne .LBB1_8
+; CHECK-NEXT:  .LBB1_9: @ %for.cond9.for.cond15.preheader_crit_edge.us
+; CHECK-NEXT:    @ in Loop: Header=BB1_11 Depth=2
+; CHECK-NEXT:    cbnz r6, .LBB1_14
+; CHECK-NEXT:  .LBB1_10: @ %for.cond.cleanup17.us
+; CHECK-NEXT:    @ in Loop: Header=BB1_11 Depth=2
+; CHECK-NEXT:    add.w r11, r3, #7
+; CHECK-NEXT:    cmn.w r3, #4
+; CHECK-NEXT:    mov.w r10, #0
+; CHECK-NEXT:    mov r3, r11
+; CHECK-NEXT:    bge .LBB1_3
+; CHECK-NEXT:  .LBB1_11: @ %for.body6.us
+; CHECK-NEXT:    @ Parent Loop BB1_4 Depth=1
+; CHECK-NEXT:    @ => This Loop Header: Depth=2
+; CHECK-NEXT:    @ Child Loop BB1_8 Depth 3
+; CHECK-NEXT:    @ Child Loop BB1_13 Depth 3
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    bne .LBB1_7
+; CHECK-NEXT:  @ %bb.12: @ %vector.body.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB1_11 Depth=2
 ; CHECK-NEXT:    mov r4, r7
 ; CHECK-NEXT:    vmov q1, q4
-; CHECK-NEXT:  .LBB1_12: @ %vector.body
+; CHECK-NEXT:  .LBB1_13: @ %vector.body
 ; CHECK-NEXT:    @ Parent Loop BB1_4 Depth=1
-; CHECK-NEXT:    @ Parent Loop BB1_8 Depth=2
+; CHECK-NEXT:    @ Parent Loop BB1_11 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    vqadd.u32 q2, q5, r1
 ; CHECK-NEXT:    subs r4, #4
@@ -508,16 +511,13 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
 ; CHECK-NEXT:    vadd.i32 q1, q1, r9
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrwt.32 q0, [q2]
-; CHECK-NEXT:    bne .LBB1_12
-; CHECK-NEXT:  .LBB1_13: @ %for.cond9.for.cond15.preheader_crit_edge.us
-; CHECK-NEXT:    @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    beq .LBB1_7
-; CHECK-NEXT:  @ %bb.14: @ %for.cond9.for.cond15.preheader_crit_edge.us
-; CHECK-NEXT:    @ in Loop: Header=BB1_8 Depth=2
+; CHECK-NEXT:    bne .LBB1_13
+; CHECK-NEXT:    b .LBB1_9
+; CHECK-NEXT:  .LBB1_14: @ %for.cond9.for.cond15.preheader_crit_edge.us
+; CHECK-NEXT:    @ in Loop: Header=BB1_11 Depth=2
 ; CHECK-NEXT:    eor r1, r10, #1
 ; CHECK-NEXT:    lsls r1, r1, #31
-; CHECK-NEXT:    bne .LBB1_7
+; CHECK-NEXT:    bne .LBB1_10
 ; CHECK-NEXT:    b .LBB1_26
 ; CHECK-NEXT:  .LBB1_15: @ %for.body6.lr.ph.split
 ; CHECK-NEXT:    @ in Loop: Header=BB1_4 Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index 88131fcf21a9233..51122ddfda63416 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1021,24 +1021,39 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    str r4, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    str r7, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT:    b .LBB16_5
-; CHECK-NEXT:  .LBB16_3: @ %for.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    b .LBB16_8
+; CHECK-NEXT:  .LBB16_3: @ %while.body76.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:  .LBB16_4: @ %while.body76
+; CHECK-NEXT:    @ Parent Loop BB16_8 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    ldrh r4, [r6], #2
+; CHECK-NEXT:    vldrh.u16 q1, [r0], #2
+; CHECK-NEXT:    vfma.f16 q0, q1, r4
+; CHECK-NEXT:    le lr, .LBB16_4
+; CHECK-NEXT:  @ %bb.5: @ %while.end.loopexit
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    wls lr, r0, .LBB16_4
-; CHECK-NEXT:    b .LBB16_9
-; CHECK-NEXT:  .LBB16_4: @ %while.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    add.w r5, r5, r0, lsl #1
+; CHECK-NEXT:    b .LBB16_7
+; CHECK-NEXT:  .LBB16_6: @ %for.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    wls lr, r0, .LBB16_7
+; CHECK-NEXT:    b .LBB16_3
+; CHECK-NEXT:  .LBB16_7: @ %while.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    subs.w r12, r12, #1
 ; CHECK-NEXT:    vstrb.8 q0, [r2], #8
 ; CHECK-NEXT:    add.w r0, r5, r0, lsl #1
 ; CHECK-NEXT:    add.w r5, r0, #8
-; CHECK-NEXT:    beq.w .LBB16_12
-; CHECK-NEXT:  .LBB16_5: @ %while.body
+; CHECK-NEXT:    beq .LBB16_12
+; CHECK-NEXT:  .LBB16_8: @ %while.body
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB16_7 Depth 2
 ; CHECK-NEXT:    @ Child Loop BB16_10 Depth 2
+; CHECK-NEXT:    @ Child Loop BB16_4 Depth 2
 ; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    ldrh.w lr, [r3, #14]
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #8
@@ -1074,14 +1089,14 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vfma.f16 q0, q1, lr
 ; CHECK-NEXT:    cmp r0, #16
-; CHECK-NEXT:    blo .LBB16_8
-; CHECK-NEXT:  @ %bb.6: @ %for.body.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    blo .LBB16_11
+; CHECK-NEXT:  @ %bb.9: @ %for.body.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    dls lr, r0
 ; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:  .LBB16_7: @ %for.body
-; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
+; CHECK-NEXT:  .LBB16_10: @ %for.body
+; CHECK-NEXT:    @ Parent Loop BB16_8 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldrh r0, [r6], #16
 ; CHECK-NEXT:    vldrw.u32 q1, [r5]
@@ -1112,26 +1127,11 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
 ; CHECK-NEXT:    adds r5, #16
 ; CHECK-NEXT:    vfma.f16 q0, q1, r4
-; CHECK-NEXT:    le lr, .LBB16_7
-; CHECK-NEXT:    b .LBB16_3
-; CHECK-NEXT:  .LBB16_8: @ in Loop: Header=BB16_5 Depth=1
-; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    b .LBB16_3
-; CHECK-NEXT:  .LBB16_9: @ %while.body76.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
-; CHECK-NEXT:    mov r0, r5
-; CHECK-NEXT:  .LBB16_10: @ %while.body76
-; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    ldrh r4, [r6], #2
-; CHECK-NEXT:    vldrh.u16 q1, [r0], #2
-; CHECK-NEXT:    vfma.f16 q0, q1, r4
 ; CHECK-NEXT:    le lr, .LBB16_10
-; CHECK-NEXT:  @ %bb.11: @ %while.end.loopexit
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    add.w r5, r5, r0, lsl #1
-; CHECK-NEXT:    b .LBB16_4
+; CHECK-NEXT:    b .LBB16_6
+; CHECK-NEXT:  .LBB16_11: @ in Loop: Header=BB16_8 Depth=1
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    b .LBB16_6
 ; CHECK-NEXT:  .LBB16_12: @ %if.end
 ; CHECK-NEXT:    add sp, #24
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index ca6b8c2fffa22cc..f7fbff39162e0d2 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1016,25 +1016,40 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    str r6, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    b .LBB16_5
-; CHECK-NEXT:  .LBB16_3: @ %for.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    b .LBB16_8
+; CHECK-NEXT:  .LBB16_3: @ %while.body76.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
+; CHECK-NEXT:    mov r3, r4
+; CHECK-NEXT:  .LBB16_4: @ %while.body76
+; CHECK-NEXT:    @ Parent Loop BB16_8 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    ldr r0, [r7], #4
+; CHECK-NEXT:    vldrw.u32 q1, [r3], #4
+; CHECK-NEXT:    vfma.f32 q0, q1, r0
+; CHECK-NEXT:    le lr, .LBB16_4
+; CHECK-NEXT:  @ %bb.5: @ %while.end.loopexit
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    add.w r4, r4, r0, lsl #2
+; CHECK-NEXT:    b .LBB16_7
+; CHECK-NEXT:  .LBB16_6: @ %for.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
 ; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
 ; CHECK-NEXT:    ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload
-; CHECK-NEXT:    wls lr, r0, .LBB16_4
-; CHECK-NEXT:    b .LBB16_9
-; CHECK-NEXT:  .LBB16_4: @ %while.end
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    wls lr, r0, .LBB16_7
+; CHECK-NEXT:    b .LBB16_3
+; CHECK-NEXT:  .LBB16_7: @ %while.end
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    subs.w r12, r12, #1
 ; CHECK-NEXT:    vstrb.8 q0, [r2], #16
 ; CHECK-NEXT:    add.w r0, r4, r0, lsl #2
 ; CHECK-NEXT:    add.w r4, r0, #16
 ; CHECK-NEXT:    beq .LBB16_12
-; CHECK-NEXT:  .LBB16_5: @ %while.body
+; CHECK-NEXT:  .LBB16_8: @ %while.body
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB16_7 Depth 2
 ; CHECK-NEXT:    @ Child Loop BB16_10 Depth 2
+; CHECK-NEXT:    @ Child Loop BB16_4 Depth 2
 ; CHECK-NEXT:    add.w lr, r10, #8
 ; CHECK-NEXT:    vldrw.u32 q0, [r1], #16
 ; CHECK-NEXT:    ldrd r3, r7, [r10]
@@ -1060,14 +1075,14 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    vfma.f32 q0, q3, r11
 ; CHECK-NEXT:    cmp r0, #16
 ; CHECK-NEXT:    vfma.f32 q0, q1, r8
-; CHECK-NEXT:    blo .LBB16_8
-; CHECK-NEXT:  @ %bb.6: @ %for.body.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
+; CHECK-NEXT:    blo .LBB16_11
+; CHECK-NEXT:  @ %bb.9: @ %for.body.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB16_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    dls lr, r0
 ; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:  .LBB16_7: @ %for.body
-; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
+; CHECK-NEXT:  .LBB16_10: @ %for.body
+; CHECK-NEXT:    @ Parent Loop BB16_8 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldm.w r7, {r0, r3, r5, r6, r8, r11}
 ; CHECK-NEXT:    vldrw.u32 q1, [r4], #32
@@ -1088,26 +1103,11 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no
 ; CHECK-NEXT:    vfma.f32 q0, q2, r11
 ; CHECK-NEXT:    vfma.f32 q0, q3, r9
 ; CHECK-NEXT:    vfma.f32 q0, q1, r1
-; CHECK-NEXT:    le lr, .LBB16_7
-; CHECK-NEXT:    b .LBB16_3
-; CHECK-NEXT:  .LBB16_8: @ in Loop: Header=BB16_5 Depth=1
-; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    b .LBB16_3
-; CHECK-NEXT:  .LBB16_9: @ %while.body76.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
-; CHECK-NEXT:    mov r3, r4
-; CHECK-NEXT:  .LBB16_10: @ %while.body76
-; CHECK-NEXT:    @ Parent Loop BB16_5 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    ldr r0, [r7], #4
-; CHECK-NEXT:    vldrw.u32 q1, [r3], #4
-; CHECK-NEXT:    vfma.f32 q0, q1, r0
 ; CHECK-NEXT:    le lr, .LBB16_10
-; CHECK-NEXT:  @ %bb.11: @ %while.end.loopexit
-; CHECK-NEXT:    @ in Loop: Header=BB16_5 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    add.w r4, r4, r0, lsl #2
-; CHECK-NEXT:    b .LBB16_4
+; CHECK-NEXT:    b .LBB16_6
+; CHECK-NEXT:  .LBB16_11: @ in Loop: Header=BB16_8 Depth=1
+; CHECK-NEXT:    ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    b .LBB16_6
 ; CHECK-NEXT:  .LBB16_12:
 ; CHECK-NEXT:    add sp, #32
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
@@ -1582,36 +1582,12 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
 ; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    b .LBB19_3
-; CHECK-NEXT:  .LBB19_1: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT:    mov r3, r8
-; CHECK-NEXT:    mov r7, r5
-; CHECK-NEXT:    mov r4, r11
-; CHECK-NEXT:    mov r8, r10
-; CHECK-NEXT:  .LBB19_2: @ %if.end69
-; CHECK-NEXT:    @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT:    ldrd r2, r6, [sp, #8] @ 8-byte Folded Reload
-; CHECK-NEXT:    adds r0, #128
-; CHECK-NEXT:    strd r7, r4, [r9]
-; CHECK-NEXT:    subs r6, #1
-; CHECK-NEXT:    strd r3, r8, [r9, #8]
-; CHECK-NEXT:    add.w r9, r9, #16
-; CHECK-NEXT:    mov r1, r2
-; CHECK-NEXT:    beq.w .LBB19_13
-; CHECK-NEXT:  .LBB19_3: @ %do.body
-; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB19_5 Depth 2
-; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    mov r6, r2
-; CHECK-NEXT:    ldrd r5, r11, [r9]
-; CHECK-NEXT:    ldrd r8, r10, [r9, #8]
-; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
-; CHECK-NEXT:    wls lr, r2, .LBB19_6
-; CHECK-NEXT:  @ %bb.4: @ %while.body.lr.ph
+; CHECK-NEXT:  .LBB19_1: @ %while.body.lr.ph
 ; CHECK-NEXT:    @ in Loop: Header=BB19_3 Depth=1
 ; CHECK-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    mov r4, r11
 ; CHECK-NEXT:    mov r3, r5
-; CHECK-NEXT:  .LBB19_5: @ %while.body
+; CHECK-NEXT:  .LBB19_2: @ %while.body
 ; CHECK-NEXT:    @ Parent Loop BB19_3 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldr r5, [r1, #12]
@@ -1638,13 +1614,39 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
 ; CHECK-NEXT:    vstrb.8 q2, [r6], #16
 ; CHECK-NEXT:    mov r3, r5
 ; CHECK-NEXT:    mov r12, r5
-; CHECK-NEXT:    le lr, .LBB19_5
-; CHECK-NEXT:  .LBB19_6: @ %while.end
+; CHECK-NEXT:    le lr, .LBB19_2
+; CHECK-NEXT:    b .LBB19_4
+; CHECK-NEXT:  .LBB19_3: @ %do.body
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB19_2 Depth 2
+; CHECK-NEXT:    str r6, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    ldrd r5, r11, [r9]
+; CHECK-NEXT:    ldrd r8, r10, [r9, #8]
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    wls lr, r2, .LBB19_4
+; CHECK-NEXT:    b .LBB19_1
+; CHECK-NEXT:  .LBB19_4: @ %while.end
 ; CHECK-NEXT:    @ in Loop: Header=BB19_3 Depth=1
 ; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    beq .LBB19_1
-; CHECK-NEXT:  @ %bb.7: @ %if.then
+; CHECK-NEXT:    cbnz r2, .LBB19_7
+; CHECK-NEXT:  @ %bb.5: @ in Loop: Header=BB19_3 Depth=1
+; CHECK-NEXT:    mov r3, r8
+; CHECK-NEXT:    mov r7, r5
+; CHECK-NEXT:    mov r4, r11
+; CHECK-NEXT:    mov r8, r10
+; CHECK-NEXT:  .LBB19_6: @ %if.end69
+; CHECK-NEXT:    @ in Loop: Header=BB19_3 Depth=1
+; CHECK-NEXT:    ldrd r2, r6, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT:    adds r0, #128
+; CHECK-NEXT:    strd r7, r4, [r9]
+; CHECK-NEXT:    subs r6, #1
+; CHECK-NEXT:    strd r3, r8, [r9, #8]
+; CHECK-NEXT:    add.w r9, r9, #16
+; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    beq .LBB19_13
+; CHECK-NEXT:    b .LBB19_3
+; CHECK-NEXT:  .LBB19_7: @ %if.then
 ; CHECK-NEXT:    @ in Loop: Header=BB19_3 Depth=1
 ; CHECK-NEXT:    ldrd lr, r4, [r1]
 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
@@ -1694,7 +1696,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
 ; CHECK-NEXT:  .LBB19_12: @ %if.end69
 ; CHECK-NEXT:    @ in Loop: Header=BB19_3 Depth=1
 ; CHECK-NEXT:    mov r12, r1
-; CHECK-NEXT:    b .LBB19_2
+; CHECK-NEXT:    b .LBB19_6
 ; CHECK-NEXT:  .LBB19_13: @ %do.end
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
index a89d3522ca5f27f..f997e8cf824516a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
@@ -613,13 +613,8 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    itt ne
 ; CHECK-NEXT:    ldrne r0, [sp, #136]
 ; CHECK-NEXT:    cmpne r0, #0
-; CHECK-NEXT:    bne .LBB10_2
-; CHECK-NEXT:  .LBB10_1: @ %for.cond.cleanup
-; CHECK-NEXT:    add sp, #32
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-; CHECK-NEXT:  .LBB10_2: @ %for.cond1.preheader.us.preheader
+; CHECK-NEXT:    beq.w .LBB10_15
+; CHECK-NEXT:  @ %bb.1: @ %for.cond1.preheader.us.preheader
 ; CHECK-NEXT:    ldr.w r12, [sp, #140]
 ; CHECK-NEXT:    movs r7, #1
 ; CHECK-NEXT:    mov.w r11, #0
@@ -639,69 +634,18 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    vshl.i32 q3, q1, #2
 ; CHECK-NEXT:    movs r3, #0
 ; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    b .LBB10_5
-; CHECK-NEXT:  .LBB10_3: @ %for.cond5.preheader.us73.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB10_5 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    add.w r3, r0, r5, lsl #1
-; CHECK-NEXT:    wlstp.8 lr, r6, .LBB10_4
-; CHECK-NEXT:    b .LBB10_15
-; CHECK-NEXT:  .LBB10_4: @ %for.cond1.for.cond.cleanup3_crit_edge.us
-; CHECK-NEXT:    @ in Loop: Header=BB10_5 Depth=1
-; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    add r11, r12
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    add r3, r0
-; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    adds r3, #1
-; CHECK-NEXT:    cmp r3, r0
-; CHECK-NEXT:    beq .LBB10_1
-; CHECK-NEXT:  .LBB10_5: @ %for.cond1.preheader.us
-; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB10_8 Depth 2
-; CHECK-NEXT:    @ Child Loop BB10_11 Depth 3
-; CHECK-NEXT:    @ Child Loop BB10_14 Depth 3
-; CHECK-NEXT:    @ Child Loop BB10_15 Depth 2
-; CHECK-NEXT:    mul r5, r3, r7
-; CHECK-NEXT:    cmp.w r12, #0
-; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    beq .LBB10_3
-; CHECK-NEXT:  @ %bb.6: @ %for.cond5.preheader.us.us.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB10_5 Depth=1
-; CHECK-NEXT:    mov.w r8, #0
-; CHECK-NEXT:    b .LBB10_8
-; CHECK-NEXT:  .LBB10_7: @ %for.cond5.for.cond.cleanup7_crit_edge.us.us
-; CHECK-NEXT:    @ in Loop: Header=BB10_8 Depth=2
-; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    add.w r0, r8, r5
-; CHECK-NEXT:    add.w r8, r8, #1
-; CHECK-NEXT:    cmp r8, r7
-; CHECK-NEXT:    strh.w r10, [r3, r0, lsl #1]
-; CHECK-NEXT:    beq .LBB10_4
-; CHECK-NEXT:  .LBB10_8: @ %for.cond5.preheader.us.us
-; CHECK-NEXT:    @ Parent Loop BB10_5 Depth=1
-; CHECK-NEXT:    @ => This Loop Header: Depth=2
-; CHECK-NEXT:    @ Child Loop BB10_11 Depth 3
-; CHECK-NEXT:    @ Child Loop BB10_14 Depth 3
-; CHECK-NEXT:    cmp.w r12, #3
-; CHECK-NEXT:    bhi .LBB10_10
-; CHECK-NEXT:  @ %bb.9: @ in Loop: Header=BB10_8 Depth=2
-; CHECK-NEXT:    movs r4, #0
-; CHECK-NEXT:    mov.w r10, #0
-; CHECK-NEXT:    b .LBB10_13
-; CHECK-NEXT:  .LBB10_10: @ %vector.ph
-; CHECK-NEXT:    @ in Loop: Header=BB10_8 Depth=2
+; CHECK-NEXT:    b .LBB10_10
+; CHECK-NEXT:  .LBB10_2: @ %vector.ph
+; CHECK-NEXT:    @ in Loop: Header=BB10_6 Depth=2
 ; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vmov q5, q1
 ; CHECK-NEXT:    vmov.i32 q4, #0x0
 ; CHECK-NEXT:    vmlas.i32 q5, q2, r8
 ; CHECK-NEXT:    dls lr, r0
 ; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:  .LBB10_11: @ %vector.body
-; CHECK-NEXT:    @ Parent Loop BB10_5 Depth=1
-; CHECK-NEXT:    @ Parent Loop BB10_8 Depth=2
+; CHECK-NEXT:  .LBB10_3: @ %vector.body
+; CHECK-NEXT:    @ Parent Loop BB10_10 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB10_6 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    vadd.i32 q6, q5, q3
 ; CHECK-NEXT:    vldrh.s32 q7, [r1, q5, uxtw #1]
@@ -709,15 +653,33 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    vmul.i32 q5, q7, q5
 ; CHECK-NEXT:    vadd.i32 q4, q5, q4
 ; CHECK-NEXT:    vmov q5, q6
-; CHECK-NEXT:    le lr, .LBB10_11
-; CHECK-NEXT:  @ %bb.12: @ %middle.block
-; CHECK-NEXT:    @ in Loop: Header=BB10_8 Depth=2
+; CHECK-NEXT:    le lr, .LBB10_3
+; CHECK-NEXT:  @ %bb.4: @ %middle.block
+; CHECK-NEXT:    @ in Loop: Header=BB10_6 Depth=2
 ; CHECK-NEXT:    vaddv.u32 r10, q4
 ; CHECK-NEXT:    cmp r2, r12
 ; CHECK-NEXT:    mov r4, r2
-; CHECK-NEXT:    beq .LBB10_7
-; CHECK-NEXT:  .LBB10_13: @ %for.body8.us.us.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB10_8 Depth=2
+; CHECK-NEXT:    bne .LBB10_8
+; CHECK-NEXT:  .LBB10_5: @ %for.cond5.for.cond.cleanup7_crit_edge.us.us
+; CHECK-NEXT:    @ in Loop: Header=BB10_6 Depth=2
+; CHECK-NEXT:    ldr r3, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    add.w r0, r8, r5
+; CHECK-NEXT:    add.w r8, r8, #1
+; CHECK-NEXT:    cmp r8, r7
+; CHECK-NEXT:    strh.w r10, [r3, r0, lsl #1]
+; CHECK-NEXT:    beq .LBB10_14
+; CHECK-NEXT:  .LBB10_6: @ %for.cond5.preheader.us.us
+; CHECK-NEXT:    @ Parent Loop BB10_10 Depth=1
+; CHECK-NEXT:    @ => This Loop Header: Depth=2
+; CHECK-NEXT:    @ Child Loop BB10_3 Depth 3
+; CHECK-NEXT:    @ Child Loop BB10_9 Depth 3
+; CHECK-NEXT:    cmp.w r12, #3
+; CHECK-NEXT:    bhi .LBB10_2
+; CHECK-NEXT:  @ %bb.7: @ in Loop: Header=BB10_6 Depth=2
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    mov.w r10, #0
+; CHECK-NEXT:  .LBB10_8: @ %for.body8.us.us.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB10_6 Depth=2
 ; CHECK-NEXT:    mla r3, r7, r4, r8
 ; CHECK-NEXT:    add.w r0, r11, r4
 ; CHECK-NEXT:    ldr r7, [sp, #24] @ 4-byte Reload
@@ -725,21 +687,56 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    add.w r9, r7, r0, lsl #1
 ; CHECK-NEXT:    ldr r7, [sp, #136]
 ; CHECK-NEXT:    add.w r3, r1, r3, lsl #1
-; CHECK-NEXT:  .LBB10_14: @ %for.body8.us.us
-; CHECK-NEXT:    @ Parent Loop BB10_5 Depth=1
-; CHECK-NEXT:    @ Parent Loop BB10_8 Depth=2
+; CHECK-NEXT:  .LBB10_9: @ %for.body8.us.us
+; CHECK-NEXT:    @ Parent Loop BB10_10 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB10_6 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    ldrsh.w r4, [r3]
 ; CHECK-NEXT:    add r3, r6
 ; CHECK-NEXT:    ldrsh r0, [r9], #2
 ; CHECK-NEXT:    smlabb r10, r4, r0, r10
-; CHECK-NEXT:    le lr, .LBB10_14
-; CHECK-NEXT:    b .LBB10_7
-; CHECK-NEXT:  .LBB10_15: @ Parent Loop BB10_5 Depth=1
+; CHECK-NEXT:    le lr, .LBB10_9
+; CHECK-NEXT:    b .LBB10_5
+; CHECK-NEXT:  .LBB10_10: @ %for.cond1.preheader.us
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB10_6 Depth 2
+; CHECK-NEXT:    @ Child Loop BB10_3 Depth 3
+; CHECK-NEXT:    @ Child Loop BB10_9 Depth 3
+; CHECK-NEXT:    @ Child Loop BB10_13 Depth 2
+; CHECK-NEXT:    mul r5, r3, r7
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    beq .LBB10_12
+; CHECK-NEXT:  @ %bb.11: @ %for.cond5.preheader.us.us.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB10_10 Depth=1
+; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    b .LBB10_6
+; CHECK-NEXT:  .LBB10_12: @ %for.cond5.preheader.us73.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB10_10 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    add.w r3, r0, r5, lsl #1
+; CHECK-NEXT:    wlstp.8 lr, r6, .LBB10_14
+; CHECK-NEXT:  .LBB10_13: @ Parent Loop BB10_10 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    vstrb.8 q0, [r3], #16
-; CHECK-NEXT:    letp lr, .LBB10_15
-; CHECK-NEXT:    b .LBB10_4
+; CHECK-NEXT:    letp lr, .LBB10_13
+; CHECK-NEXT:  .LBB10_14: @ %for.cond1.for.cond.cleanup3_crit_edge.us
+; CHECK-NEXT:    @ in Loop: Header=BB10_10 Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    add r11, r12
+; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    add r3, r0
+; CHECK-NEXT:    str r3, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    ldr r3, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    adds r3, #1
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    bne .LBB10_10
+; CHECK-NEXT:  .LBB10_15: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.16:
 ; CHECK-NEXT:  .LCPI10_0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
index b5c9b903e184168..e290f64e208e900 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll
@@ -22,31 +22,28 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
 ; CHECK-NEXT:    vmov.i32 q1, #0x0
 ; CHECK-NEXT:    vmov.i32 q2, #0x0
 ; CHECK-NEXT:    vmov.i32 q3, #0x0
-; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:  .LBB0_2: @ %land.end.us.3
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    movs r3, #1
-; CHECK-NEXT:  .LBB0_3: @ %for.body.us
-; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
-; CHECK-NEXT:    @ Child Loop BB0_6 Depth 2
-; CHECK-NEXT:    @ Child Loop BB0_8 Depth 2
-; CHECK-NEXT:    @ Child Loop BB0_11 Depth 2
-; CHECK-NEXT:    ldr.w r0, [r2, r3, lsl #2]
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_2: @ Parent Loop BB0_8 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_2
+; CHECK-NEXT:  .LBB0_3: @ %land.end.us.1
+; CHECK-NEXT:    @ in Loop: Header=BB0_8 Depth=1
+; CHECK-NEXT:    ldr r0, [r2, #4]
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    ite ne
-; CHECK-NEXT:    ldrbne r0, [r1, r3]
+; CHECK-NEXT:    ldrbne r0, [r1, #1]
 ; CHECK-NEXT:    moveq r0, #0
-; CHECK-NEXT:    mla r3, r3, r12, r5
-; CHECK-NEXT:    add r3, r0
+; CHECK-NEXT:    adds r3, r5, r0
 ; CHECK-NEXT:    rsb.w r0, r0, #108
+; CHECK-NEXT:    adds r3, #19
 ; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_5
-; CHECK-NEXT:  .LBB0_4: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:  .LBB0_4: @ Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vstrb.8 q0, [r3], #16
+; CHECK-NEXT:    vstrb.8 q2, [r3], #16
 ; CHECK-NEXT:    letp lr, .LBB0_4
-; CHECK-NEXT:  .LBB0_5: @ %land.end.us
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:  .LBB0_5: @ %land.end.us.2
+; CHECK-NEXT:    @ in Loop: Header=BB0_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [r2, #4]
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    ite ne
@@ -56,27 +53,34 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
 ; CHECK-NEXT:    rsb.w r0, r0, #108
 ; CHECK-NEXT:    adds r3, #19
 ; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_7
-; CHECK-NEXT:  .LBB0_6: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:  .LBB0_6: @ Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vstrb.8 q1, [r3], #16
+; CHECK-NEXT:    vstrb.8 q3, [r3], #16
 ; CHECK-NEXT:    letp lr, .LBB0_6
-; CHECK-NEXT:  .LBB0_7: @ %land.end.us.1
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    ldr r0, [r2, #4]
+; CHECK-NEXT:  .LBB0_7: @ %land.end.us.3
+; CHECK-NEXT:    @ in Loop: Header=BB0_8 Depth=1
+; CHECK-NEXT:    movs r3, #1
+; CHECK-NEXT:  .LBB0_8: @ %for.body.us
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB0_9 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_2 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    @ Child Loop BB0_6 Depth 2
+; CHECK-NEXT:    ldr.w r0, [r2, r3, lsl #2]
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    ite ne
-; CHECK-NEXT:    ldrbne r0, [r1, #1]
+; CHECK-NEXT:    ldrbne r0, [r1, r3]
 ; CHECK-NEXT:    moveq r0, #0
-; CHECK-NEXT:    adds r3, r5, r0
+; CHECK-NEXT:    mla r3, r3, r12, r5
+; CHECK-NEXT:    add r3, r0
 ; CHECK-NEXT:    rsb.w r0, r0, #108
-; CHECK-NEXT:    adds r3, #19
-; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_9
-; CHECK-NEXT:  .LBB0_8: @ Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB0_10
+; CHECK-NEXT:  .LBB0_9: @ Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vstrb.8 q2, [r3], #16
-; CHECK-NEXT:    letp lr, .LBB0_8
-; CHECK-NEXT:  .LBB0_9: @ %land.end.us.2
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    vstrb.8 q0, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_9
+; CHECK-NEXT:  .LBB0_10: @ %land.end.us
+; CHECK-NEXT:    @ in Loop: Header=BB0_8 Depth=1
 ; CHECK-NEXT:    ldr r0, [r2, #4]
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    ite ne
@@ -88,14 +92,10 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly %
 ; CHECK-NEXT:    adds r3, #19
 ; CHECK-NEXT:    lsrs r4, r4, #4
 ; CHECK-NEXT:    cmp.w r4, #0
-; CHECK-NEXT:    beq .LBB0_2
-; CHECK-NEXT:  @ %bb.10: @ %land.end.us.2
-; CHECK-NEXT:    @ in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    beq .LBB0_3
+; CHECK-NEXT:  @ %bb.11: @ %land.end.us
+; CHECK-NEXT:    @ in Loop: Header=BB0_8 Depth=1
 ; CHECK-NEXT:    dlstp.8 lr, r0
-; CHECK-NEXT:  .LBB0_11: @ Parent Loop BB0_3 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vstrb.8 q3, [r3], #16
-; CHECK-NEXT:    letp lr, .LBB0_11
 ; CHECK-NEXT:    b .LBB0_2
 ; CHECK-NEXT:  .LBB0_12:
 ; CHECK-NEXT:    movw r12, :lower16:arr_183
diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
index da59cb259db6168..3fc7162c3c5e979 100644
--- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll
@@ -450,56 +450,52 @@ define void @multilooped_exit(i32 %b) {
 ; CHECK-NEXT:    mov.w r4, #-1
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    b .LBB18_3
-; CHECK-NEXT:  .LBB18_2: @ %loop
-; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
-; CHECK-NEXT:    adds r4, #1
-; CHECK-NEXT:    cmp.w r4, #1024
-; CHECK-NEXT:    bge .LBB18_12
+; CHECK-NEXT:  .LBB18_2: @ Parent Loop BB18_3 Depth=1
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    letp lr, .LBB18_2
+; CHECK-NEXT:    b .LBB18_4
 ; CHECK-NEXT:  .LBB18_3: @ %loop
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB18_4 Depth 2
-; CHECK-NEXT:    @ Child Loop BB18_6 Depth 2
-; CHECK-NEXT:    @ Child Loop BB18_8 Depth 2
-; CHECK-NEXT:    @ Child Loop BB18_11 Depth 2
+; CHECK-NEXT:    @ Child Loop BB18_2 Depth 2
+; CHECK-NEXT:    @ Child Loop BB18_5 Depth 2
+; CHECK-NEXT:    @ Child Loop BB18_7 Depth 2
+; CHECK-NEXT:    @ Child Loop BB18_9 Depth 2
 ; CHECK-NEXT:    movw r3, :lower16:arr_56
-; CHECK-NEXT:    add.w r1, r0, #15
 ; CHECK-NEXT:    movt r3, :upper16:arr_56
-; CHECK-NEXT:    lsr.w r12, r1, #4
 ; CHECK-NEXT:    mov r2, r3
-; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_5
-; CHECK-NEXT:  .LBB18_4: @ Parent Loop BB18_3 Depth=1
-; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    vstrb.8 q0, [r2], #16
-; CHECK-NEXT:    letp lr, .LBB18_4
-; CHECK-NEXT:  .LBB18_5: @ %loop
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_4
+; CHECK-NEXT:    b .LBB18_2
+; CHECK-NEXT:  .LBB18_4: @ %loop
 ; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
 ; CHECK-NEXT:    mov r2, r3
-; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_7
-; CHECK-NEXT:  .LBB18_6: @ Parent Loop BB18_3 Depth=1
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_6
+; CHECK-NEXT:  .LBB18_5: @ Parent Loop BB18_3 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    vstrb.8 q0, [r2], #16
-; CHECK-NEXT:    letp lr, .LBB18_6
-; CHECK-NEXT:  .LBB18_7: @ %loop
+; CHECK-NEXT:    letp lr, .LBB18_5
+; CHECK-NEXT:  .LBB18_6: @ %loop
 ; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
 ; CHECK-NEXT:    mov r2, r3
-; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_9
-; CHECK-NEXT:  .LBB18_8: @ Parent Loop BB18_3 Depth=1
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_8
+; CHECK-NEXT:  .LBB18_7: @ Parent Loop BB18_3 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    vstrb.8 q0, [r2], #16
-; CHECK-NEXT:    letp lr, .LBB18_8
-; CHECK-NEXT:  .LBB18_9: @ %loop
-; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
-; CHECK-NEXT:    cmp.w r12, #0
-; CHECK-NEXT:    beq .LBB18_2
-; CHECK-NEXT:  @ %bb.10: @ %loop
+; CHECK-NEXT:    letp lr, .LBB18_7
+; CHECK-NEXT:  .LBB18_8: @ %loop
 ; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
-; CHECK-NEXT:    dlstp.8 lr, r0
-; CHECK-NEXT:  .LBB18_11: @ Parent Loop BB18_3 Depth=1
+; CHECK-NEXT:    wlstp.8 lr, r0, .LBB18_10
+; CHECK-NEXT:  .LBB18_9: @ Parent Loop BB18_3 Depth=1
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    vstrb.8 q0, [r3], #16
-; CHECK-NEXT:    letp lr, .LBB18_11
-; CHECK-NEXT:    b .LBB18_2
-; CHECK-NEXT:  .LBB18_12:
+; CHECK-NEXT:    letp lr, .LBB18_9
+; CHECK-NEXT:  .LBB18_10: @ %loop
+; CHECK-NEXT:    @ in Loop: Header=BB18_3 Depth=1
+; CHECK-NEXT:    adds r4, #1
+; CHECK-NEXT:    cmp.w r4, #1024
+; CHECK-NEXT:    bge .LBB18_11
+; CHECK-NEXT:    b .LBB18_3
+; CHECK-NEXT:  .LBB18_11:
 ; CHECK-NEXT:    pop.w {r4, lr}
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
index 2aa183c31bab595..db209931c29a6c0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
@@ -1073,37 +1073,10 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:    mov.w r12, #1
 ; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    lsrs r1, r2, #2
-; CHECK-NEXT:    b .LBB7_3
-; CHECK-NEXT:  .LBB7_2: @ in Loop: Header=BB7_3 Depth=1
-; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT:    add.w r11, r11, #1
-; CHECK-NEXT:    lsl.w r12, r12, #2
-; CHECK-NEXT:    cmp r2, #7
-; CHECK-NEXT:    asr.w r1, r2, #2
-; CHECK-NEXT:    ble .LBB7_9
-; CHECK-NEXT:  .LBB7_3: @ =>This Loop Header: Depth=1
-; CHECK-NEXT:    @ Child Loop BB7_6 Depth 2
-; CHECK-NEXT:    @ Child Loop BB7_7 Depth 3
-; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    cmp.w r12, #1
-; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    lsr.w r2, r1, #2
-; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT:    blt .LBB7_2
-; CHECK-NEXT:  @ %bb.4: @ in Loop: Header=BB7_3 Depth=1
-; CHECK-NEXT:    lsrs r2, r1, #3
-; CHECK-NEXT:    str r2, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT:    beq .LBB7_2
-; CHECK-NEXT:  @ %bb.5: @ %.preheader
-; CHECK-NEXT:    @ in Loop: Header=BB7_3 Depth=1
-; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    lsls r1, r1, #1
-; CHECK-NEXT:    movs r4, #0
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    lsl.w r10, r2, #1
-; CHECK-NEXT:  .LBB7_6: @ Parent Loop BB7_3 Depth=1
+; CHECK-NEXT:    b .LBB7_6
+; CHECK-NEXT:  .LBB7_2: @ Parent Loop BB7_6 Depth=1
 ; CHECK-NEXT:    @ => This Loop Header: Depth=2
-; CHECK-NEXT:    @ Child Loop BB7_7 Depth 3
+; CHECK-NEXT:    @ Child Loop BB7_3 Depth 3
 ; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    ldrd lr, r2, [r0, #16]
 ; CHECK-NEXT:    ldrd r3, r8, [r0, #24]
@@ -1122,8 +1095,8 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:    add.w r8, r2, r10, lsl #2
 ; CHECK-NEXT:    add.w r9, r8, r10, lsl #2
 ; CHECK-NEXT:    dls lr, r6
-; CHECK-NEXT:  .LBB7_7: @ Parent Loop BB7_3 Depth=1
-; CHECK-NEXT:    @ Parent Loop BB7_6 Depth=2
+; CHECK-NEXT:  .LBB7_3: @ Parent Loop BB7_6 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB7_2 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    vldrw.u32 q3, [r9]
 ; CHECK-NEXT:    vldrw.u32 q4, [r2]
@@ -1150,11 +1123,38 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #0
 ; CHECK-NEXT:    vcmla.f32 q2, q0, q1, #90
 ; CHECK-NEXT:    vstrb.8 q2, [r9], #16
-; CHECK-NEXT:    le lr, .LBB7_7
-; CHECK-NEXT:  @ %bb.8: @ in Loop: Header=BB7_6 Depth=2
+; CHECK-NEXT:    le lr, .LBB7_3
+; CHECK-NEXT:  @ %bb.4: @ in Loop: Header=BB7_2 Depth=2
 ; CHECK-NEXT:    adds r4, #1
 ; CHECK-NEXT:    cmp r4, r12
-; CHECK-NEXT:    bne .LBB7_6
+; CHECK-NEXT:    bne .LBB7_2
+; CHECK-NEXT:  .LBB7_5: @ in Loop: Header=BB7_6 Depth=1
+; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    add.w r11, r11, #1
+; CHECK-NEXT:    lsl.w r12, r12, #2
+; CHECK-NEXT:    cmp r2, #7
+; CHECK-NEXT:    asr.w r1, r2, #2
+; CHECK-NEXT:    ble .LBB7_9
+; CHECK-NEXT:  .LBB7_6: @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB7_2 Depth 2
+; CHECK-NEXT:    @ Child Loop BB7_3 Depth 3
+; CHECK-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    lsr.w r2, r1, #2
+; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    blt .LBB7_5
+; CHECK-NEXT:  @ %bb.7: @ in Loop: Header=BB7_6 Depth=1
+; CHECK-NEXT:    lsrs r2, r1, #3
+; CHECK-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    beq .LBB7_5
+; CHECK-NEXT:  @ %bb.8: @ %.preheader
+; CHECK-NEXT:    @ in Loop: Header=BB7_6 Depth=1
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    lsls r1, r1, #1
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    lsl.w r10, r2, #1
 ; CHECK-NEXT:    b .LBB7_2
 ; CHECK-NEXT:  .LBB7_9:
 ; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
diff --git a/llvm/test/CodeGen/VE/Scalar/br_jt.ll b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
index 216d4cca097001c..fd44a54a7273801 100644
--- a/llvm/test/CodeGen/VE/Scalar/br_jt.ll
+++ b/llvm/test/CodeGen/VE/Scalar/br_jt.ll
@@ -21,15 +21,15 @@ define signext i32 @br_jt3(i32 signext %0) {
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB0_1:
-; CHECK-NEXT:    or %s0, 3, (0)1
-; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
-; CHECK-NEXT:    b.l.t (, %s10)
 ; CHECK-NEXT:  .LBB0_5:
 ; CHECK-NEXT:    or %s0, 7, (0)1
 ; CHECK-NEXT:  .LBB0_6:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
+; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    or %s0, 3, (0)1
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
 ;
 ; PIC-LABEL: br_jt3:
 ; PIC:       # %bb.0:
@@ -43,14 +43,14 @@ define signext i32 @br_jt3(i32 signext %0) {
 ; PIC-NEXT:    or %s0, 0, (0)1
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    b.l.t (, %s10)
-; PIC-NEXT:  .LBB0_1:
-; PIC-NEXT:    or %s0, 3, (0)1
-; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
-; PIC-NEXT:    b.l.t (, %s10)
 ; PIC-NEXT:  .LBB0_5:
 ; PIC-NEXT:    or %s0, 7, (0)1
 ; PIC-NEXT:  .LBB0_6:
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT:    b.l.t (, %s10)
+; PIC-NEXT:  .LBB0_1:
+; PIC-NEXT:    or %s0, 3, (0)1
+; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    b.l.t (, %s10)
   switch i32 %0, label %4 [
     i32 1, label %5
@@ -308,16 +308,16 @@ define signext i32 @br_jt3_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    or %s0, 0, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB4_1:
-; CHECK-NEXT:    or %s0, 3, (0)1
-; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
-; CHECK-NEXT:    b.l.t (, %s10)
 ; CHECK-NEXT:  .LBB4_5:
 ; CHECK-NEXT:    and %s0, %s1, (32)0
 ; CHECK-NEXT:    adds.w.sx %s0, 3, %s0
 ; CHECK-NEXT:  .LBB4_6:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
+; CHECK-NEXT:  .LBB4_1:
+; CHECK-NEXT:    or %s0, 3, (0)1
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
 ;
 ; PIC-LABEL: br_jt3_m:
 ; PIC:       # %bb.0:
@@ -331,15 +331,15 @@ define signext i32 @br_jt3_m(i32 signext %0, i32 signext %1) {
 ; PIC-NEXT:    or %s0, 0, (0)1
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    b.l.t (, %s10)
-; PIC-NEXT:  .LBB4_1:
-; PIC-NEXT:    or %s0, 3, (0)1
-; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
-; PIC-NEXT:    b.l.t (, %s10)
 ; PIC-NEXT:  .LBB4_5:
 ; PIC-NEXT:    and %s0, %s1, (32)0
 ; PIC-NEXT:    adds.w.sx %s0, 3, %s0
 ; PIC-NEXT:  .LBB4_6:
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT:    b.l.t (, %s10)
+; PIC-NEXT:  .LBB4_1:
+; PIC-NEXT:    or %s0, 3, (0)1
+; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    b.l.t (, %s10)
   switch i32 %0, label %6 [
     i32 1, label %7
@@ -534,15 +534,15 @@ define signext i32 @br_jt7_m(i32 signext %0, i32 signext %1) {
 ; PIC-NEXT:    adds.w.sx %s0, 3, %s1
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    b.l.t (, %s10)
-; PIC-NEXT:  .LBB6_2:
-; PIC-NEXT:    or %s0, 3, (0)1
-; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
-; PIC-NEXT:    b.l.t (, %s10)
 ; PIC-NEXT:  .LBB6_15:
 ; PIC-NEXT:    or %s0, 11, (0)1
 ; PIC-NEXT:  .LBB6_16:
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    b.l.t (, %s10)
+; PIC-NEXT:  .LBB6_2:
+; PIC-NEXT:    or %s0, 3, (0)1
+; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
+; PIC-NEXT:    b.l.t (, %s10)
 ; PIC-NEXT:  .LBB6_13:
 ; PIC-NEXT:    or %s0, 0, (0)1
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
@@ -615,11 +615,6 @@ define signext i32 @br_jt8_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    adds.w.sx %s0, 3, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB7_9:
-; CHECK-NEXT:    or %s0, 0, %s2
-; CHECK-NEXT:  .LBB7_10:
-; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
-; CHECK-NEXT:    b.l.t (, %s10)
 ; CHECK-NEXT:  .LBB7_5:
 ; CHECK-NEXT:    adds.w.sx %s0, -5, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
@@ -628,12 +623,17 @@ define signext i32 @br_jt8_m(i32 signext %0, i32 signext %1) {
 ; CHECK-NEXT:    adds.w.sx %s0, -2, %s1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
+; CHECK-NEXT:  .LBB7_7:
+; CHECK-NEXT:    or %s0, 10, (0)1
+; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT:    b.l.t (, %s10)
 ; CHECK-NEXT:  .LBB7_8:
 ; CHECK-NEXT:    or %s0, 11, (0)1
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
-; CHECK-NEXT:  .LBB7_7:
-; CHECK-NEXT:    or %s0, 10, (0)1
+; CHECK-NEXT:  .LBB7_9:
+; CHECK-NEXT:    or %s0, 0, %s2
+; CHECK-NEXT:  .LBB7_10:
 ; CHECK-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; CHECK-NEXT:    b.l.t (, %s10)
 ;
@@ -672,20 +672,20 @@ define signext i32 @br_jt8_m(i32 signext %0, i32 signext %1) {
 ; PIC-NEXT:  .LBB7_4:
 ; PIC-NEXT:    adds.w.sx %s0, 3, %s1
 ; PIC-NEXT:    br.l.t .LBB7_10
-; PIC-NEXT:  .LBB7_9:
-; PIC-NEXT:    or %s0, 0, %s2
-; PIC-NEXT:    br.l.t .LBB7_10
 ; PIC-NEXT:  .LBB7_5:
 ; PIC-NEXT:    adds.w.sx %s0, -5, %s1
 ; PIC-NEXT:    br.l.t .LBB7_10
 ; PIC-NEXT:  .LBB7_6:
 ; PIC-NEXT:    adds.w.sx %s0, -2, %s1
 ; PIC-NEXT:    br.l.t .LBB7_10
+; PIC-NEXT:  .LBB7_7:
+; PIC-NEXT:    or %s0, 10, (0)1
+; PIC-NEXT:    br.l.t .LBB7_10
 ; PIC-NEXT:  .LBB7_8:
 ; PIC-NEXT:    or %s0, 11, (0)1
 ; PIC-NEXT:    br.l.t .LBB7_10
-; PIC-NEXT:  .LBB7_7:
-; PIC-NEXT:    or %s0, 10, (0)1
+; PIC-NEXT:  .LBB7_9:
+; PIC-NEXT:    or %s0, 0, %s2
 ; PIC-NEXT:  .LBB7_10:
 ; PIC-NEXT:    adds.w.sx %s0, %s0, (0)1
 ; PIC-NEXT:    ld %s16, 32(, %s11)
diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index ca281043eddaaee..42e39c22b43cef3 100644
--- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -53,6 +53,30 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    movb $1, %r8b
 ; CHECK-NEXT:    jmp .LBB0_3
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_4: # %a33b
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    movl %esi, %r9d
+; CHECK-NEXT:    orl %eax, %r9d
+; CHECK-NEXT:    jns .LBB0_20
+; CHECK-NEXT:  .LBB0_5: # %a50b
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    movl %eax, %r10d
+; CHECK-NEXT:    orl %esi, %r10d
+; CHECK-NEXT:    jns .LBB0_25
+; CHECK-NEXT:  .LBB0_6: # %a57b
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    shrl $31, %r9d
+; CHECK-NEXT:    testb %r9b, %r9b
+; CHECK-NEXT:    je .LBB0_29
+; CHECK-NEXT:  .LBB0_7: # %a66b
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    shrl $31, %r10d
+; CHECK-NEXT:    testb %r10b, %r10b
+; CHECK-NEXT:    je .LBB0_33
+; CHECK-NEXT:  .LBB0_8: # %a93b
+; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    jns .LBB0_35
 ; CHECK-NEXT:  .LBB0_9: # %b1606
 ; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
 ; CHECK-NEXT:    testb %dil, %dil
@@ -95,26 +119,7 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    testb %dil, %dil
 ; CHECK-NEXT:    je .LBB0_19
-; CHECK-NEXT:  .LBB0_4: # %a33b
-; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    movl %esi, %r9d
-; CHECK-NEXT:    orl %eax, %r9d
-; CHECK-NEXT:    jns .LBB0_20
-; CHECK-NEXT:  .LBB0_5: # %a50b
-; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    movl %eax, %r10d
-; CHECK-NEXT:    orl %esi, %r10d
-; CHECK-NEXT:    jns .LBB0_25
-; CHECK-NEXT:  .LBB0_6: # %a57b
-; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    shrl $31, %r9d
-; CHECK-NEXT:    testb %r9b, %r9b
-; CHECK-NEXT:    je .LBB0_29
-; CHECK-NEXT:  .LBB0_7: # %a66b
-; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    shrl $31, %r10d
-; CHECK-NEXT:    testb %r10b, %r10b
-; CHECK-NEXT:    jne .LBB0_8
+; CHECK-NEXT:    jmp .LBB0_4
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_33: # %a74b
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
@@ -125,10 +130,7 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    # in Loop: Header=BB0_33 Depth=2
 ; CHECK-NEXT:    testb %dil, %dil
 ; CHECK-NEXT:    jne .LBB0_33
-; CHECK-NEXT:  .LBB0_8: # %a93b
-; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT:    testl %eax, %eax
-; CHECK-NEXT:    js .LBB0_9
+; CHECK-NEXT:    jmp .LBB0_8
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_35: # %a97b
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
@@ -163,6 +165,28 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    je .LBB0_37
 ; CHECK-NEXT:    jmp .LBB0_21
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_23: # %a45b
+; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    # Parent Loop BB0_22 Depth=2
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=3
+; CHECK-NEXT:    testb %dil, %dil
+; CHECK-NEXT:    je .LBB0_23
+; CHECK-NEXT:  .LBB0_24: # %b712
+; CHECK-NEXT:    # in Loop: Header=BB0_22 Depth=2
+; CHECK-NEXT:    testb %dil, %dil
+; CHECK-NEXT:    je .LBB0_5
+; CHECK-NEXT:  .LBB0_22: # %b535
+; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    # => This Loop Header: Depth=2
+; CHECK-NEXT:    # Child Loop BB0_23 Depth 3
+; CHECK-NEXT:    testq %rdx, %rdx
+; CHECK-NEXT:    jns .LBB0_23
+; CHECK-NEXT:    jmp .LBB0_24
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_26: # %b879
+; CHECK-NEXT:    # in Loop: Header=BB0_25 Depth=2
+; CHECK-NEXT:    testb %r8b, %r8b
+; CHECK-NEXT:    je .LBB0_28
 ; CHECK-NEXT:  .LBB0_27: # %b1016
 ; CHECK-NEXT:    # in Loop: Header=BB0_25 Depth=2
 ; CHECK-NEXT:    testq %rsi, %rsi
@@ -181,10 +205,7 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    testb %dil, %dil
 ; CHECK-NEXT:    je .LBB0_38
-; CHECK-NEXT:  .LBB0_26: # %b879
-; CHECK-NEXT:    # in Loop: Header=BB0_25 Depth=2
-; CHECK-NEXT:    testb %r8b, %r8b
-; CHECK-NEXT:    jne .LBB0_27
+; CHECK-NEXT:    jmp .LBB0_26
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_28: # %a53b1019
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
@@ -194,6 +215,10 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    jle .LBB0_28
 ; CHECK-NEXT:    jmp .LBB0_27
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_30: # %b1139
+; CHECK-NEXT:    # in Loop: Header=BB0_29 Depth=2
+; CHECK-NEXT:    testq %rsi, %rsi
+; CHECK-NEXT:    jg .LBB0_32
 ; CHECK-NEXT:  .LBB0_31: # %b1263
 ; CHECK-NEXT:    # in Loop: Header=BB0_29 Depth=2
 ; CHECK-NEXT:    testq %rdx, %rdx
@@ -212,10 +237,7 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    jle .LBB0_39
-; CHECK-NEXT:  .LBB0_30: # %b1139
-; CHECK-NEXT:    # in Loop: Header=BB0_29 Depth=2
-; CHECK-NEXT:    testq %rsi, %rsi
-; CHECK-NEXT:    jle .LBB0_31
+; CHECK-NEXT:    jmp .LBB0_30
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_32: # %a63b1266
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
@@ -224,25 +246,6 @@ define dso_local void @foo(ptr %a0, ptr %a1, ptr %a2, ptr %a3, ptr %a4, ptr %a5)
 ; CHECK-NEXT:    testq %rsi, %rsi
 ; CHECK-NEXT:    jle .LBB0_32
 ; CHECK-NEXT:    jmp .LBB0_31
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_24: # %b712
-; CHECK-NEXT:    # in Loop: Header=BB0_22 Depth=2
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    je .LBB0_5
-; CHECK-NEXT:  .LBB0_22: # %b535
-; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
-; CHECK-NEXT:    # => This Loop Header: Depth=2
-; CHECK-NEXT:    # Child Loop BB0_23 Depth 3
-; CHECK-NEXT:    testq %rdx, %rdx
-; CHECK-NEXT:    js .LBB0_24
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_23: # %a45b
-; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
-; CHECK-NEXT:    # Parent Loop BB0_22 Depth=2
-; CHECK-NEXT:    # => This Inner Loop Header: Depth=3
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    je .LBB0_23
-; CHECK-NEXT:    jmp .LBB0_24
 ; CHECK-NEXT:  .LBB0_10: # %a109b
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index bf939c4131080d3..332a4596eb337e0 100644
--- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -137,15 +137,6 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
 ; CHECK-NEXT:    calll __ZN12wxStringBase10ConcatSelfEmPKwm
 ; CHECK-NEXT:  Ltmp11:
 ; CHECK-NEXT:    jmp LBB0_5
-; CHECK-NEXT:  LBB0_9: ## %bb5657
-; CHECK-NEXT:  Ltmp13:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl %eax, (%esp)
-; CHECK-NEXT:    calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE
-; CHECK-NEXT:  Ltmp14:
-; CHECK-NEXT:    jmp LBB0_25
 ; CHECK-NEXT:  LBB0_20: ## %bb5968
 ; CHECK-NEXT:  Ltmp2:
 ; CHECK-NEXT:    movl $0, {{[0-9]+}}(%esp)
@@ -154,6 +145,15 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct
 ; CHECK-NEXT:    calll __ZN8wxString6FormatEPKwz
 ; CHECK-NEXT:    subl $4, %esp
 ; CHECK-NEXT:  Ltmp3:
+; CHECK-NEXT:    jmp LBB0_25
+; CHECK-NEXT:  LBB0_9: ## %bb5657
+; CHECK-NEXT:  Ltmp13:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl %eax, (%esp)
+; CHECK-NEXT:    calll __ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE
+; CHECK-NEXT:  Ltmp14:
 ; CHECK-NEXT:  LBB0_25: ## %bb115.critedge.i
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    addl $28, %esp
diff --git a/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll b/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll
index 214da14322d511e..5ad218f6cdc8934 100644
--- a/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll
+++ b/llvm/test/CodeGen/X86/2009-08-12-badswitch.ll
@@ -45,9 +45,6 @@ define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
 ; CHECK-NEXT:  LBB0_3: ## %RRETURN_6
 ; CHECK-NEXT:    callq _f2
 ; CHECK-NEXT:    jmp LBB0_28
-; CHECK-NEXT:  LBB0_2: ## %RETURN
-; CHECK-NEXT:    callq _f1
-; CHECK-NEXT:    jmp LBB0_28
 ; CHECK-NEXT:  LBB0_4: ## %RRETURN_7
 ; CHECK-NEXT:    callq _f3
 ; CHECK-NEXT:    jmp LBB0_28
@@ -119,6 +116,9 @@ define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
 ; CHECK-NEXT:    jmp LBB0_28
 ; CHECK-NEXT:  LBB0_27: ## %RRETURN_1
 ; CHECK-NEXT:    callq _f26
+; CHECK-NEXT:    jmp LBB0_28
+; CHECK-NEXT:  LBB0_2: ## %RETURN
+; CHECK-NEXT:    callq _f1
 ; CHECK-NEXT:  LBB0_28: ## %EXIT
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    popq %rcx
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index a522f0e9828a054..5cdabc0a79b0d8e 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -1149,8 +1149,8 @@ define void @test_flow_unwind() personality i32 (...)* @pers {
 ; CHECK: %entry
 ; CHECK: %then
 ; CHECK: %exit
-; CHECK: %innerlp
 ; CHECK: %outerlp
+; CHECK: %innerlp
 ; CHECK: %outercleanup
 entry:
   %0 = invoke i32 @foo()
diff --git a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll
index f5f033398310116..880f85cc7f17b78 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-outputs.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-outputs.ll
@@ -50,12 +50,12 @@ define i32 @test2(i32 %out1, i32 %out2) nounwind {
 ; CHECK-NEXT:  .LBB1_2: # Block address taken
 ; CHECK-NEXT:    # %if.then.label_true_crit_edge
 ; CHECK-NEXT:    # Label of block must be emitted
-; CHECK-NEXT:    jmp .LBB1_8
+; CHECK-NEXT:    jmp .LBB1_9
 ; CHECK-NEXT:  .LBB1_3: # %if.else
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    testl %esi, %edi
 ; CHECK-NEXT:    testl %esi, %edi
-; CHECK-NEXT:    jne .LBB1_9
+; CHECK-NEXT:    jne .LBB1_6
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:  .LBB1_4:
 ; CHECK-NEXT:    movl %esi, %eax
@@ -64,20 +64,20 @@ define i32 @test2(i32 %out1, i32 %out2) nounwind {
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    retl
-; CHECK-NEXT:  .LBB1_7: # Block address taken
-; CHECK-NEXT:    # %if.else.label_true_crit_edge
-; CHECK-NEXT:    # Label of block must be emitted
-; CHECK-NEXT:  .LBB1_8: # %label_true
-; CHECK-NEXT:    movl $-2, %eax
-; CHECK-NEXT:    jmp .LBB1_5
-; CHECK-NEXT:  .LBB1_9: # Block address taken
+; CHECK-NEXT:  .LBB1_6: # Block address taken
 ; CHECK-NEXT:    # %if.else.return_crit_edge
 ; CHECK-NEXT:    # Label of block must be emitted
-; CHECK-NEXT:  .LBB1_6: # Block address taken
+; CHECK-NEXT:  .LBB1_7: # Block address taken
 ; CHECK-NEXT:    # %if.then.return_crit_edge
 ; CHECK-NEXT:    # Label of block must be emitted
 ; CHECK-NEXT:    movl $-1, %eax
 ; CHECK-NEXT:    jmp .LBB1_5
+; CHECK-NEXT:  .LBB1_8: # Block address taken
+; CHECK-NEXT:    # %if.else.label_true_crit_edge
+; CHECK-NEXT:    # Label of block must be emitted
+; CHECK-NEXT:  .LBB1_9: # %label_true
+; CHECK-NEXT:    movl $-2, %eax
+; CHECK-NEXT:    jmp .LBB1_5
 entry:
   %cmp = icmp slt i32 %out1, %out2
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll
index cdf2fb05a731374..cc14f73b1c2faa9 100644
--- a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll
+++ b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll
@@ -59,9 +59,9 @@ define void @bar() !prof !1 {
 ; Test a nested loop case when profile data is available.
 ;
 ; CHECK-PROFILE-LABEL: bar:
+; CHECK-PROFILE: callq g
 ; CHECK-PROFILE: callq h
 ; CHECK-PROFILE: callq b
-; CHECK-PROFILE: callq g
 ; CHECK-PROFILE: callq e
 ; CHECK-PROFILE: callq f
 ; CHECK-PROFILE: callq c
diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index adb7319fe80b114..2f0dc756b426108 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -144,17 +144,17 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    movq x0 at GOTPCREL(%rip), %r15
 ; CHECK-NEXT:    movq %rsi, %r12
 ; CHECK-NEXT:    jmp .LBB1_2
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_15: # %for.cond1.for.inc3_crit_edge
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movl %edx, (%r8)
-; CHECK-NEXT:  .LBB1_16: # %for.inc3
+; CHECK-NEXT:  .LBB1_9: # %vector.body.prol.preheader
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    addq %rbx, %r12
-; CHECK-NEXT:    incq %r14
-; CHECK-NEXT:    addq %rbx, %r11
-; CHECK-NEXT:    incl %ecx
-; CHECK-NEXT:    je .LBB1_17
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT:    movdqu %xmm0, (%r12,%r13,8)
+; CHECK-NEXT:    movdqu %xmm0, 16(%r12,%r13,8)
+; CHECK-NEXT:    movl $4, %r10d
+; CHECK-NEXT:    shrq $2, %rax
+; CHECK-NEXT:    jne .LBB1_11
+; CHECK-NEXT:    jmp .LBB1_13
+; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB1_2: # %for.cond1thread-pre-split
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB1_12 Depth 2
@@ -197,18 +197,8 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    leaq -4(%rdx), %rax
 ; CHECK-NEXT:    btl $2, %eax
-; CHECK-NEXT:    jb .LBB1_8
-; CHECK-NEXT:  # %bb.9: # %vector.body.prol.preheader
-; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT:    movdqu %xmm0, (%r12,%r13,8)
-; CHECK-NEXT:    movdqu %xmm0, 16(%r12,%r13,8)
-; CHECK-NEXT:    movl $4, %r10d
-; CHECK-NEXT:    shrq $2, %rax
-; CHECK-NEXT:    jne .LBB1_11
-; CHECK-NEXT:    jmp .LBB1_13
-; CHECK-NEXT:  .LBB1_8: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    jae .LBB1_9
+; CHECK-NEXT:  # %bb.8: # in Loop: Header=BB1_2 Depth=1
 ; CHECK-NEXT:    xorl %r10d, %r10d
 ; CHECK-NEXT:    shrq $2, %rax
 ; CHECK-NEXT:    je .LBB1_13
@@ -247,8 +237,17 @@ define void @_Z2x6v() local_unnamed_addr {
 ; CHECK-NEXT:    cmpq $-1, %r13
 ; CHECK-NEXT:    movq %rdx, %r13
 ; CHECK-NEXT:    jl .LBB1_14
-; CHECK-NEXT:    jmp .LBB1_15
-; CHECK-NEXT:  .LBB1_17: # %for.cond.for.end5_crit_edge
+; CHECK-NEXT:  .LBB1_15: # %for.cond1.for.inc3_crit_edge
+; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    movl %edx, (%r8)
+; CHECK-NEXT:  .LBB1_16: # %for.inc3
+; CHECK-NEXT:    # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    addq %rbx, %r12
+; CHECK-NEXT:    incq %r14
+; CHECK-NEXT:    addq %rbx, %r11
+; CHECK-NEXT:    incl %ecx
+; CHECK-NEXT:    jne .LBB1_2
+; CHECK-NEXT:  # %bb.17: # %for.cond.for.end5_crit_edge
 ; CHECK-NEXT:    movq x5 at GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq %rdi, (%rax)
 ; CHECK-NEXT:    movq x3 at GOTPCREL(%rip), %rax
diff --git a/llvm/test/CodeGen/X86/dup-cost.ll b/llvm/test/CodeGen/X86/dup-cost.ll
index ec9d36aa2a11b65..93af487c64ca09d 100644
--- a/llvm/test/CodeGen/X86/dup-cost.ll
+++ b/llvm/test/CodeGen/X86/dup-cost.ll
@@ -16,13 +16,13 @@ define i32 @cold(i32 %a, ptr %p, ptr %q) !prof !21 {
 ; CHECK-NEXT:  # %bb.4: # %true2
 ; CHECK-NEXT:    xorl %edi, %eax
 ; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_5: # %false2
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB0_2: # %false1
 ; CHECK-NEXT:    movl (%rdx), %eax
 ; CHECK-NEXT:    addl $-3, %eax
 ; CHECK-NEXT:    jmp .LBB0_3
-; CHECK-NEXT:  .LBB0_5: # %false2
-; CHECK-NEXT:    andl %edi, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond1 = icmp sgt i32 %a, 1
   br i1 %cond1, label %true1, label %false1, !prof !30
diff --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
index ee23c922174d64c..4b60ce6154d1c08 100644
--- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
@@ -219,19 +219,21 @@ define void @test_multi_def(ptr dereferenceable(8) align(8) %x1,
 ; CHECK-NEXT:    xorl %r8d, %r8d
 ; CHECK-NEXT:    movq (%rdi), %rdx
 ; CHECK-NEXT:    movq (%rsi), %rsi
+; CHECK-NEXT:    jmp LBB4_2
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB4_1: ## %for.check
+; CHECK-NEXT:    ## in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    incq %r8
+; CHECK-NEXT:    addq $16, %rax
+; CHECK-NEXT:    cmpq %rcx, %r8
+; CHECK-NEXT:    jge LBB4_3
 ; CHECK-NEXT:  LBB4_2: ## %for.body
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    mulxq %rsi, %r9, %rdi
 ; CHECK-NEXT:    addq %r9, (%rax)
 ; CHECK-NEXT:    adcq %rdi, 8(%rax)
-; CHECK-NEXT:  ## %bb.1: ## %for.check
-; CHECK-NEXT:    ## in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    incq %r8
-; CHECK-NEXT:    addq $16, %rax
-; CHECK-NEXT:    cmpq %rcx, %r8
-; CHECK-NEXT:    jl LBB4_2
-; CHECK-NEXT:  ## %bb.3: ## %exit
+; CHECK-NEXT:    jmp LBB4_1
+; CHECK-NEXT:  LBB4_3: ## %exit
 ; CHECK-NEXT:    retq
                             ptr dereferenceable(8) align(8) %x2,
                             ptr %y, i64 %count) nounwind nofree nosync {
@@ -267,19 +269,21 @@ define void @test_div_def(ptr dereferenceable(8) align(8) %x1,
 ; CHECK-NEXT:    xorl %r9d, %r9d
 ; CHECK-NEXT:    movl (%rdi), %edi
 ; CHECK-NEXT:    movl (%rsi), %esi
+; CHECK-NEXT:    jmp LBB5_2
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB5_1: ## %for.check
+; CHECK-NEXT:    ## in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    incq %r9
+; CHECK-NEXT:    cmpl %ecx, %r9d
+; CHECK-NEXT:    jge LBB5_3
 ; CHECK-NEXT:  LBB5_2: ## %for.body
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divl %esi
 ; CHECK-NEXT:    addl %eax, (%r8,%r9,4)
-; CHECK-NEXT:  ## %bb.1: ## %for.check
-; CHECK-NEXT:    ## in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    incq %r9
-; CHECK-NEXT:    cmpl %ecx, %r9d
-; CHECK-NEXT:    jl LBB5_2
-; CHECK-NEXT:  ## %bb.3: ## %exit
+; CHECK-NEXT:    jmp LBB5_1
+; CHECK-NEXT:  LBB5_3: ## %exit
 ; CHECK-NEXT:    retq
                           ptr dereferenceable(8) align(8) %x2,
                           ptr %y, i32 %count) nounwind nofree nosync {
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce7.ll b/llvm/test/CodeGen/X86/loop-strength-reduce7.ll
index 3687194972f27f4..66a4a894093bcb1 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce7.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -13,7 +13,12 @@ define fastcc void @outer_loop(ptr nocapture %gfp, ptr nocapture %xr, i32 %targ_
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl $88, %eax
 ; CHECK-NEXT:    movl $168, %ecx
+; CHECK-NEXT:    jmp LBB0_2
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  LBB0_1: ## %bb28.i37.loopexit
+; CHECK-NEXT:    ## in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    addl $4, %eax
+; CHECK-NEXT:    addl $168, %ecx
 ; CHECK-NEXT:  LBB0_2: ## %bb28.i37
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_3 Depth 2
@@ -27,11 +32,7 @@ define fastcc void @outer_loop(ptr nocapture %gfp, ptr nocapture %xr, i32 %targ_
 ; CHECK-NEXT:    addl $12, %esi
 ; CHECK-NEXT:    cmpl $11, %edx
 ; CHECK-NEXT:    jbe LBB0_3
-; CHECK-NEXT:  ## %bb.1: ## %bb28.i37.loopexit
-; CHECK-NEXT:    ## in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    addl $4, %eax
-; CHECK-NEXT:    addl $168, %ecx
-; CHECK-NEXT:    jmp LBB0_2
+; CHECK-NEXT:    jmp LBB0_1
 entry:
 	br label %bb4
 
diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll
index beb2dba05e85ac3..9b74ef08a8f5482 100644
--- a/llvm/test/CodeGen/X86/mul-constant-result.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-result.ll
@@ -28,7 +28,7 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:  .LBB0_4:
 ; X86-NEXT:    decl %ecx
 ; X86-NEXT:    cmpl $31, %ecx
-; X86-NEXT:    ja .LBB0_7
+; X86-NEXT:    ja .LBB0_12
 ; X86-NEXT:  # %bb.5:
 ; X86-NEXT:    jmpl *.LJTI0_0(,%ecx,4)
 ; X86-NEXT:  .LBB0_6:
@@ -36,27 +36,27 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_7:
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:  .LBB0_8:
-; X86-NEXT:    popl %esi
-; X86-NEXT:    .cfi_def_cfa_offset 4
-; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_10:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $2, %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
-; X86-NEXT:  .LBB0_12:
+; X86-NEXT:  .LBB0_10:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    jmp .LBB0_9
-; X86-NEXT:  .LBB0_13:
+; X86-NEXT:    jmp .LBB0_7
+; X86-NEXT:  .LBB0_11:
 ; X86-NEXT:    leal (,%eax,8), %ecx
 ; X86-NEXT:    jmp .LBB0_42
+; X86-NEXT:  .LBB0_12:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:  .LBB0_13:
+; X86-NEXT:    popl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 4
+; X86-NEXT:    retl
 ; X86-NEXT:  .LBB0_14:
+; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $3, %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
@@ -64,13 +64,13 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:  .LBB0_16:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    jmp .LBB0_11
+; X86-NEXT:    jmp .LBB0_9
 ; X86-NEXT:  .LBB0_17:
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:    jmp .LBB0_18
 ; X86-NEXT:  .LBB0_19:
 ; X86-NEXT:    shll $2, %eax
-; X86-NEXT:    jmp .LBB0_9
+; X86-NEXT:    jmp .LBB0_7
 ; X86-NEXT:  .LBB0_20:
 ; X86-NEXT:    leal (%eax,%eax,2), %ecx
 ; X86-NEXT:    jmp .LBB0_21
@@ -80,7 +80,7 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:    jmp .LBB0_23
 ; X86-NEXT:  .LBB0_24:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
-; X86-NEXT:    jmp .LBB0_9
+; X86-NEXT:    jmp .LBB0_7
 ; X86-NEXT:  .LBB0_25:
 ; X86-NEXT:    shll $4, %eax
 ; X86-NEXT:    popl %esi
@@ -109,7 +109,7 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:  .LBB0_30:
 ; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    shll $2, %eax
-; X86-NEXT:    jmp .LBB0_11
+; X86-NEXT:    jmp .LBB0_9
 ; X86-NEXT:  .LBB0_31:
 ; X86-NEXT:    leal (%eax,%eax,4), %ecx
 ; X86-NEXT:  .LBB0_21:
@@ -128,10 +128,10 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:    jmp .LBB0_42
 ; X86-NEXT:  .LBB0_34:
 ; X86-NEXT:    shll $3, %eax
-; X86-NEXT:    jmp .LBB0_9
+; X86-NEXT:    jmp .LBB0_7
 ; X86-NEXT:  .LBB0_35:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
-; X86-NEXT:  .LBB0_11:
+; X86-NEXT:  .LBB0_9:
 ; X86-NEXT:    leal (%eax,%eax,4), %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
@@ -143,7 +143,7 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X86-NEXT:    jmp .LBB0_27
 ; X86-NEXT:  .LBB0_37:
 ; X86-NEXT:    leal (%eax,%eax,8), %eax
-; X86-NEXT:  .LBB0_9:
+; X86-NEXT:  .LBB0_7:
 ; X86-NEXT:    leal (%eax,%eax,2), %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
@@ -199,41 +199,54 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:    cmovel %ecx, %eax
 ; X64-HSW-NEXT:    decl %edi
 ; X64-HSW-NEXT:    cmpl $31, %edi
-; X64-HSW-NEXT:    ja .LBB0_3
+; X64-HSW-NEXT:    ja .LBB0_8
 ; X64-HSW-NEXT:  # %bb.1:
 ; X64-HSW-NEXT:    jmpq *.LJTI0_0(,%rdi,8)
 ; X64-HSW-NEXT:  .LBB0_2:
 ; X64-HSW-NEXT:    addl %eax, %eax
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_3:
-; X64-HSW-NEXT:    xorl %eax, %eax
 ; X64-HSW-NEXT:  .LBB0_4:
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_6:
 ; X64-HSW-NEXT:    shll $2, %eax
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_8:
+; X64-HSW-NEXT:  .LBB0_6:
 ; X64-HSW-NEXT:    addl %eax, %eax
-; X64-HSW-NEXT:  .LBB0_5:
+; X64-HSW-NEXT:  .LBB0_3:
 ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_33:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_34
+; X64-HSW-NEXT:  .LBB0_8:
+; X64-HSW-NEXT:    xorl %eax, %eax
 ; X64-HSW-NEXT:  .LBB0_9:
-; X64-HSW-NEXT:    leal (,%rax,8), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_38
+; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-HSW-NEXT:    retq
 ; X64-HSW-NEXT:  .LBB0_10:
 ; X64-HSW-NEXT:    shll $3, %eax
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
 ; X64-HSW-NEXT:  .LBB0_12:
 ; X64-HSW-NEXT:    addl %eax, %eax
-; X64-HSW-NEXT:  .LBB0_7:
+; X64-HSW-NEXT:  .LBB0_5:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_31:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT:    leal (%rcx,%rcx,4), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_34
+; X64-HSW-NEXT:  .LBB0_32:
+; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_7:
+; X64-HSW-NEXT:    leal (,%rax,8), %ecx
+; X64-HSW-NEXT:    jmp .LBB0_38
 ; X64-HSW-NEXT:  .LBB0_13:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,2), %eax
@@ -292,33 +305,6 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:  .LBB0_27:
 ; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
 ; X64-HSW-NEXT:    leal (%rax,%rcx,4), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_34
-; X64-HSW-NEXT:  .LBB0_28:
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
-; X64-HSW-NEXT:    shll $3, %ecx
-; X64-HSW-NEXT:    jmp .LBB0_38
-; X64-HSW-NEXT:  .LBB0_29:
-; X64-HSW-NEXT:    shll $3, %eax
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_30:
-; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
-; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_31:
-; X64-HSW-NEXT:    leal (%rax,%rax,4), %ecx
-; X64-HSW-NEXT:    leal (%rcx,%rcx,4), %ecx
-; X64-HSW-NEXT:    jmp .LBB0_34
-; X64-HSW-NEXT:  .LBB0_32:
-; X64-HSW-NEXT:    leal (%rax,%rax,8), %eax
-; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
-; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
-; X64-HSW-NEXT:    retq
-; X64-HSW-NEXT:  .LBB0_33:
-; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
-; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
 ; X64-HSW-NEXT:  .LBB0_34:
 ; X64-HSW-NEXT:    addl %eax, %ecx
 ; X64-HSW-NEXT:    movl %ecx, %eax
@@ -340,6 +326,10 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:  .LBB0_37:
 ; X64-HSW-NEXT:    movl %eax, %ecx
 ; X64-HSW-NEXT:    shll $5, %ecx
+; X64-HSW-NEXT:    jmp .LBB0_38
+; X64-HSW-NEXT:  .LBB0_28:
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT:    shll $3, %ecx
 ; X64-HSW-NEXT:  .LBB0_38:
 ; X64-HSW-NEXT:    subl %eax, %ecx
 ; X64-HSW-NEXT:    movl %ecx, %eax
@@ -348,6 +338,16 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
 ; X64-HSW-NEXT:  .LBB0_40:
 ; X64-HSW-NEXT:    shll $5, %eax
 ; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_29:
+; X64-HSW-NEXT:    shll $3, %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
+; X64-HSW-NEXT:    retq
+; X64-HSW-NEXT:  .LBB0_30:
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-HSW-NEXT:    retq
   %3 = icmp eq i32 %1, 0
   %4 = icmp sgt i32 %1, 1
diff --git a/llvm/test/CodeGen/X86/pic.ll b/llvm/test/CodeGen/X86/pic.ll
index 7c4db752b4e0425..408ea9ad71bd9e6 100644
--- a/llvm/test/CodeGen/X86/pic.ll
+++ b/llvm/test/CodeGen/X86/pic.ll
@@ -231,19 +231,19 @@ bb12:
 ; CHECK-I686:	.long	 .LBB7_5 at GOTOFF
 ; CHECK-I686:	.long	 .LBB7_8 at GOTOFF
 ; CHECK-I686:	.long	 .LBB7_7 at GOTOFF
+; CHECK-X32:	.long	.LBB7_2-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_2-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_4-.LJTI7_0
 ; CHECK-X32:	.long	.LBB7_3-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_4-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_6-.LJTI7_0
 ; CHECK-X32:	.long	.LBB7_3-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_12-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_8-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_12-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_10-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_8-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_9-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_10-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_9-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_12-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_14-.LJTI7_0
-; CHECK-X32:	.long	.LBB7_14-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_5-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_6-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_5-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_4-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_7-.LJTI7_0
+; CHECK-X32:	.long	.LBB7_7-.LJTI7_0
 }
 
 declare void @foo1(...)
diff --git a/llvm/test/CodeGen/X86/postalloc-coalescing.ll b/llvm/test/CodeGen/X86/postalloc-coalescing.ll
index 68266fc4e158a3c..aba3150447d4adc 100644
--- a/llvm/test/CodeGen/X86/postalloc-coalescing.ll
+++ b/llvm/test/CodeGen/X86/postalloc-coalescing.ll
@@ -5,15 +5,16 @@ define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
 ; CHECK-LABEL: _Z18yy_get_next_bufferv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl $42, %eax
-; CHECK-NEXT:    cmpl $-1, %eax
-; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:    jmp .LBB0_2
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %bb116
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    movb %al, 0
+; CHECK-NEXT:  .LBB0_2: # %bb131
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    cmpl $-1, %eax
 ; CHECK-NEXT:    jne .LBB0_1
-; CHECK-NEXT:  .LBB0_3: # %bb158
+; CHECK-NEXT:  # %bb.3: # %bb158
 ; CHECK-NEXT:    movb %al, 0
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/pr38743.ll b/llvm/test/CodeGen/X86/pr38743.ll
index c05310090660dd0..ef1c87118c8118f 100644
--- a/llvm/test/CodeGen/X86/pr38743.ll
+++ b/llvm/test/CodeGen/X86/pr38743.ll
@@ -27,15 +27,15 @@ define void @pr38743(i32 %a0) #1 align 2 {
 ; CHECK-NEXT:    movw %ax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq .str.17(%rip), %rax
 ; CHECK-NEXT:    jmp .LBB0_4
-; CHECK-NEXT:  .LBB0_1: # %bb2
-; CHECK-NEXT:    movq .str.16+7(%rip), %rax
-; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq .str.16(%rip), %rax
-; CHECK-NEXT:    jmp .LBB0_4
 ; CHECK-NEXT:  .LBB0_3: # %bb8
 ; CHECK-NEXT:    movq .str.18+6(%rip), %rax
 ; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq .str.18(%rip), %rax
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_1: # %bb2
+; CHECK-NEXT:    movq .str.16+7(%rip), %rax
+; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq .str.16(%rip), %rax
 ; CHECK-NEXT:  .LBB0_4: # %bb12
 ; CHECK-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index 8e0532e60652800..cb7213325ee9107 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -30,7 +30,12 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    # implicit-def: $ebp
 ; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_14: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:  .LBB0_13: # %if.end26
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    testb %dl, %dl
+; CHECK-NEXT:    jne .LBB0_15
+; CHECK-NEXT:  # %bb.14: # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movb %dl, %ch
 ; CHECK-NEXT:    movl %ecx, %edx
 ; CHECK-NEXT:  .LBB0_1: # %for.cond
@@ -65,8 +70,22 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    # implicit-def: $eax
 ; CHECK-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
 ; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    je .LBB0_6
-; CHECK-NEXT:    jmp .LBB0_18
+; CHECK-NEXT:    jne .LBB0_18
+; CHECK-NEXT:    jmp .LBB0_6
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_15: # %if.end26
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    testl %ebp, %ebp
+; CHECK-NEXT:    jne .LBB0_16
+; CHECK-NEXT:  # %bb.17: # %if.then31
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    movb %dl, %ch
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:  .LBB0_18: # %for.inc
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_3: # %if.then
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
@@ -106,18 +125,13 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    # implicit-def: $ebp
 ; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    je .LBB0_13
+; CHECK-NEXT:    jmp .LBB0_10
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_16: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movb %dl, %ch
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_10: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    # implicit-def: $eax
-; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    je .LBB0_19
-; CHECK-NEXT:  .LBB0_12: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    # implicit-def: $edi
-; CHECK-NEXT:    # implicit-def: $ch
-; CHECK-NEXT:    # implicit-def: $dl
-; CHECK-NEXT:    # implicit-def: $ebp
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jne .LBB0_11
 ; CHECK-NEXT:  .LBB0_7: # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    movb %dl, %cl
@@ -134,30 +148,19 @@ define dso_local void @fn() {
 ; CHECK-NEXT:  # %bb.24: # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    movb %ch, %dl
 ; CHECK-NEXT:    testb %bl, %bl
-; CHECK-NEXT:    jne .LBB0_10
-; CHECK-NEXT:  .LBB0_13: # %if.end26
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    je .LBB0_14
-; CHECK-NEXT:  # %bb.15: # %if.end26
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    testl %ebp, %ebp
-; CHECK-NEXT:    jne .LBB0_16
-; CHECK-NEXT:  # %bb.17: # %if.then31
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    movb %dl, %ch
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:  .LBB0_18: # %for.inc
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    jmp .LBB0_1
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB0_16: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movb %dl, %ch
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:    je .LBB0_13
+; CHECK-NEXT:  .LBB0_10: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    # implicit-def: $eax
+; CHECK-NEXT:    testb %bl, %bl
+; CHECK-NEXT:    je .LBB0_19
+; CHECK-NEXT:  .LBB0_12: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    # implicit-def: $edi
+; CHECK-NEXT:    # implicit-def: $ch
+; CHECK-NEXT:    # implicit-def: $dl
+; CHECK-NEXT:    # implicit-def: $ebp
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jne .LBB0_11
+; CHECK-NEXT:    jmp .LBB0_7
 entry:
   br label %for.cond
 
diff --git a/llvm/test/CodeGen/X86/pr63692.ll b/llvm/test/CodeGen/X86/pr63692.ll
index fb3198743a3949e..e42540cb9bc158b 100644
--- a/llvm/test/CodeGen/X86/pr63692.ll
+++ b/llvm/test/CodeGen/X86/pr63692.ll
@@ -4,16 +4,17 @@
 define void @prefault(ptr noundef %range_start, ptr noundef readnone %range_end) {
 ; CHECK-LABEL: prefault:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cmpq %rsi, %rdi
-; CHECK-NEXT:    jae .LBB0_3
+; CHECK-NEXT:    jmp .LBB0_2
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %while.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    lock orb $0, (%rdi)
 ; CHECK-NEXT:    addq $4096, %rdi # imm = 0x1000
+; CHECK-NEXT:  .LBB0_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    cmpq %rsi, %rdi
 ; CHECK-NEXT:    jb .LBB0_1
-; CHECK-NEXT:  .LBB0_3: # %while.end
+; CHECK-NEXT:  # %bb.3: # %while.end
 ; CHECK-NEXT:    retq
 entry:
   %cmp3 = icmp ult ptr %range_start, %range_end
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index e9448a800fd9597..c50a39c99fb6c65 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -190,14 +190,14 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
 ; CHECK-NEXT:    movl $268, %ebp ## imm = 0x10C
 ; CHECK-NEXT:    jmp LBB0_20
-; CHECK-NEXT:  LBB0_39: ## %sw.bb566
-; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT:    movl $20, %ebp
-; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:  LBB0_19: ## %sw.bb243
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    movl $2, %ebp
 ; CHECK-NEXT:    jmp LBB0_20
+; CHECK-NEXT:  LBB0_39: ## %sw.bb566
+; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
+; CHECK-NEXT:    movl $20, %ebp
+; CHECK-NEXT:    jmp LBB0_20
 ; CHECK-NEXT:  LBB0_32: ## %if.end517.loopexitsplit
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=1
 ; CHECK-NEXT:    incq %rbx
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
index 4d0599022d53847..7c8618af83d3d7b 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll
@@ -534,12 +534,6 @@ define dso_local i32 @test_switch_jumptable(i32 %idx) nounwind {
 ; X64-NEXT:    movl $7, %eax
 ; X64-NEXT:    orq %rcx, %rsp
 ; X64-NEXT:    retq
-; X64-NEXT:  .LBB6_2: # %bb0
-; X64-NEXT:    cmovbeq %rax, %rcx
-; X64-NEXT:    shlq $47, %rcx
-; X64-NEXT:    movl $2, %eax
-; X64-NEXT:    orq %rcx, %rsp
-; X64-NEXT:    retq
 ; X64-NEXT:  .LBB6_4: # Block address taken
 ; X64-NEXT:    # %bb2
 ; X64-NEXT:    cmpq $.LBB6_4, %rdx
@@ -564,6 +558,12 @@ define dso_local i32 @test_switch_jumptable(i32 %idx) nounwind {
 ; X64-NEXT:    movl $11, %eax
 ; X64-NEXT:    orq %rcx, %rsp
 ; X64-NEXT:    retq
+; X64-NEXT:  .LBB6_2: # %bb0
+; X64-NEXT:    cmovbeq %rax, %rcx
+; X64-NEXT:    shlq $47, %rcx
+; X64-NEXT:    movl $2, %eax
+; X64-NEXT:    orq %rcx, %rsp
+; X64-NEXT:    retq
 ;
 ; X64-PIC-LABEL: test_switch_jumptable:
 ; X64-PIC:       # %bb.0: # %entry
@@ -589,12 +589,6 @@ define dso_local i32 @test_switch_jumptable(i32 %idx) nounwind {
 ; X64-PIC-NEXT:    movl $7, %eax
 ; X64-PIC-NEXT:    orq %rcx, %rsp
 ; X64-PIC-NEXT:    retq
-; X64-PIC-NEXT:  .LBB6_2: # %bb0
-; X64-PIC-NEXT:    cmovbeq %rax, %rcx
-; X64-PIC-NEXT:    shlq $47, %rcx
-; X64-PIC-NEXT:    movl $2, %eax
-; X64-PIC-NEXT:    orq %rcx, %rsp
-; X64-PIC-NEXT:    retq
 ; X64-PIC-NEXT:  .LBB6_4: # Block address taken
 ; X64-PIC-NEXT:    # %bb2
 ; X64-PIC-NEXT:    leaq .LBB6_4(%rip), %rsi
@@ -622,6 +616,12 @@ define dso_local i32 @test_switch_jumptable(i32 %idx) nounwind {
 ; X64-PIC-NEXT:    movl $11, %eax
 ; X64-PIC-NEXT:    orq %rcx, %rsp
 ; X64-PIC-NEXT:    retq
+; X64-PIC-NEXT:  .LBB6_2: # %bb0
+; X64-PIC-NEXT:    cmovbeq %rax, %rcx
+; X64-PIC-NEXT:    shlq $47, %rcx
+; X64-PIC-NEXT:    movl $2, %eax
+; X64-PIC-NEXT:    orq %rcx, %rsp
+; X64-PIC-NEXT:    retq
 ;
 ; X64-RETPOLINE-LABEL: test_switch_jumptable:
 ; X64-RETPOLINE:       # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
index 0c47fcddc43af2a..abd7e0d28fc25ab 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll
@@ -433,22 +433,7 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2
 ; X64-LFENCE-NEXT:    xorl %r12d, %r12d
 ; X64-LFENCE-NEXT:    jmp .LBB3_2
 ; X64-LFENCE-NEXT:    .p2align 4, 0x90
-; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
-; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
-; X64-LFENCE-NEXT:    lfence
-; X64-LFENCE-NEXT:    movslq (%r14), %rax
-; X64-LFENCE-NEXT:    movl (%rbx,%rax,4), %edi
-; X64-LFENCE-NEXT:    callq sink at PLT
-; X64-LFENCE-NEXT:    incl %r12d
-; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
-; X64-LFENCE-NEXT:    jge .LBB3_6
-; X64-LFENCE-NEXT:  .LBB3_2: # %l1.header
-; X64-LFENCE-NEXT:    # =>This Loop Header: Depth=1
-; X64-LFENCE-NEXT:    # Child Loop BB3_4 Depth 2
-; X64-LFENCE-NEXT:    lfence
-; X64-LFENCE-NEXT:    testl %r15d, %r15d
-; X64-LFENCE-NEXT:    jle .LBB3_5
-; X64-LFENCE-NEXT:  # %bb.3: # %l2.header.preheader
+; X64-LFENCE-NEXT:  .LBB3_3: # %l2.header.preheader
 ; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
 ; X64-LFENCE-NEXT:    lfence
 ; X64-LFENCE-NEXT:    xorl %r13d, %r13d
@@ -463,6 +448,21 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2
 ; X64-LFENCE-NEXT:    incl %r13d
 ; X64-LFENCE-NEXT:    cmpl %ebp, %r13d
 ; X64-LFENCE-NEXT:    jl .LBB3_4
+; X64-LFENCE-NEXT:  .LBB3_5: # %l1.latch
+; X64-LFENCE-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; X64-LFENCE-NEXT:    lfence
+; X64-LFENCE-NEXT:    movslq (%r14), %rax
+; X64-LFENCE-NEXT:    movl (%rbx,%rax,4), %edi
+; X64-LFENCE-NEXT:    callq sink at PLT
+; X64-LFENCE-NEXT:    incl %r12d
+; X64-LFENCE-NEXT:    cmpl %r15d, %r12d
+; X64-LFENCE-NEXT:    jge .LBB3_6
+; X64-LFENCE-NEXT:  .LBB3_2: # %l1.header
+; X64-LFENCE-NEXT:    # =>This Loop Header: Depth=1
+; X64-LFENCE-NEXT:    # Child Loop BB3_4 Depth 2
+; X64-LFENCE-NEXT:    lfence
+; X64-LFENCE-NEXT:    testl %r15d, %r15d
+; X64-LFENCE-NEXT:    jg .LBB3_3
 ; X64-LFENCE-NEXT:    jmp .LBB3_5
 entry:
   %a.cmp = icmp eq i32 %a, 0
diff --git a/llvm/test/CodeGen/X86/switch.ll b/llvm/test/CodeGen/X86/switch.ll
index f5040f2b2bab557..264c5bf15aa28bb 100644
--- a/llvm/test/CodeGen/X86/switch.ll
+++ b/llvm/test/CodeGen/X86/switch.ll
@@ -284,8 +284,6 @@ define void @jt_is_better(i32 %x) {
 ; CHECK-NEXT:  .LBB4_3: # %bb1
 ; CHECK-NEXT:    movl $1, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
-; CHECK-NEXT:  .LBB4_7: # %return
-; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB4_4: # %bb2
 ; CHECK-NEXT:    movl $2, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
@@ -295,6 +293,8 @@ define void @jt_is_better(i32 %x) {
 ; CHECK-NEXT:  .LBB4_6: # %bb4
 ; CHECK-NEXT:    movl $4, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB4_7: # %return
+; CHECK-NEXT:    retq
 ;
 ; NOOPT-LABEL: jt_is_better:
 ; NOOPT:       # %bb.0: # %entry
@@ -1880,13 +1880,6 @@ define void @left_leaning_weight_balanced_tree(i32 %x) {
 ; CHECK-NEXT:  # %bb.7: # %bb2
 ; CHECK-NEXT:    movl $2, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
-; CHECK-NEXT:  .LBB19_12: # %entry
-; CHECK-NEXT:    cmpl $50, %edi
-; CHECK-NEXT:    je .LBB19_17
-; CHECK-NEXT:  # %bb.13: # %entry
-; CHECK-NEXT:    cmpl $60, %edi
-; CHECK-NEXT:    je .LBB19_14
-; CHECK-NEXT:    jmp .LBB19_18
 ; CHECK-NEXT:  .LBB19_8: # %entry
 ; CHECK-NEXT:    cmpl $30, %edi
 ; CHECK-NEXT:    je .LBB19_16
@@ -1896,6 +1889,14 @@ define void @left_leaning_weight_balanced_tree(i32 %x) {
 ; CHECK-NEXT:  # %bb.10: # %bb4
 ; CHECK-NEXT:    movl $4, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
+; CHECK-NEXT:  .LBB19_12: # %entry
+; CHECK-NEXT:    cmpl $50, %edi
+; CHECK-NEXT:    je .LBB19_17
+; CHECK-NEXT:  # %bb.13: # %entry
+; CHECK-NEXT:    cmpl $60, %edi
+; CHECK-NEXT:    je .LBB19_14
+; CHECK-NEXT:  .LBB19_18: # %return
+; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB19_15: # %bb1
 ; CHECK-NEXT:    movl $1, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
@@ -1905,8 +1906,6 @@ define void @left_leaning_weight_balanced_tree(i32 %x) {
 ; CHECK-NEXT:  .LBB19_17: # %bb5
 ; CHECK-NEXT:    movl $5, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
-; CHECK-NEXT:  .LBB19_18: # %return
-; CHECK-NEXT:    retq
 ;
 ; NOOPT-LABEL: left_leaning_weight_balanced_tree:
 ; NOOPT:       # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 937cc173e7faefd..1d728b62b3796b6 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -7,27 +7,25 @@ define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    jmp .LBB0_1
 ; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_2: # %inner_loop_top
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    cmpb $0, (%rsi)
+; CHECK-NEXT:    js .LBB0_3
+; CHECK-NEXT:  # %bb.4: # %inner_loop_latch
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=2
+; CHECK-NEXT:    addq $2, %rsi
+; CHECK-NEXT:    jmp .LBB0_2
+; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_3: # %inner_loop_exit
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    incq %rsi
 ; CHECK-NEXT:  .LBB0_1: # %outer_loop_top
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
+; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    je .LBB0_5
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  # %bb.2: # %inner_loop_top
-; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    cmpb $0, (%rsi)
-; CHECK-NEXT:    js .LBB0_3
-; CHECK-NEXT:  .LBB0_4: # %inner_loop_latch
-; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
-; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    addq $2, %rsi
-; CHECK-NEXT:    cmpb $0, (%rsi)
-; CHECK-NEXT:    jns .LBB0_4
-; CHECK-NEXT:    jmp .LBB0_3
-; CHECK-NEXT:  .LBB0_5: # %exit
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  # %bb.5: # %exit
 ; CHECK-NEXT:    retq
 entry:
   %notlhs674.i = icmp eq i32 %a, 0
@@ -130,48 +128,50 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:  # %bb.6: # %shared_preheader
 ; CHECK-NEXT:    movb $32, %cl
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    jmp .LBB1_8
+; CHECK-NEXT:    jmp .LBB1_10
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_7: # %merge_predecessor_split
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_7: # %if.end287.i
+; CHECK-NEXT:    # in Loop: Header=BB1_10 Depth=1
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    # implicit-def: $cl
+; CHECK-NEXT:    jne .LBB1_10
+; CHECK-NEXT:  # %bb.8: # %if.end308.i
+; CHECK-NEXT:    # in Loop: Header=BB1_10 Depth=1
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB1_14
+; CHECK-NEXT:  # %bb.9: # %merge_predecessor_split
+; CHECK-NEXT:    # in Loop: Header=BB1_10 Depth=1
 ; CHECK-NEXT:    movb $32, %cl
-; CHECK-NEXT:  .LBB1_8: # %outer_loop_header
+; CHECK-NEXT:  .LBB1_10: # %outer_loop_header
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
-; CHECK-NEXT:    # Child Loop BB1_9 Depth 2
+; CHECK-NEXT:    # Child Loop BB1_11 Depth 2
 ; CHECK-NEXT:    testl %r13d, %r13d
 ; CHECK-NEXT:    je .LBB1_16
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB1_9: # %shared_loop_header
-; CHECK-NEXT:    # Parent Loop BB1_8 Depth=1
+; CHECK-NEXT:  .LBB1_11: # %shared_loop_header
+; CHECK-NEXT:    # Parent Loop BB1_10 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    testq %r14, %r14
 ; CHECK-NEXT:    jne .LBB1_25
-; CHECK-NEXT:  # %bb.10: # %inner_loop_body
-; CHECK-NEXT:    # in Loop: Header=BB1_9 Depth=2
+; CHECK-NEXT:  # %bb.12: # %inner_loop_body
+; CHECK-NEXT:    # in Loop: Header=BB1_11 Depth=2
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_9
-; CHECK-NEXT:  # %bb.11: # %if.end96.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    je .LBB1_11
+; CHECK-NEXT:  # %bb.13: # %if.end96.i
+; CHECK-NEXT:    # in Loop: Header=BB1_10 Depth=1
 ; CHECK-NEXT:    cmpl $3, %r13d
-; CHECK-NEXT:    jae .LBB1_20
-; CHECK-NEXT:  # %bb.12: # %if.end287.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    jne .LBB1_8
-; CHECK-NEXT:  # %bb.13: # %if.end308.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    je .LBB1_7
-; CHECK-NEXT:  # %bb.14: # %if.end335.i
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    jb .LBB1_7
+; CHECK-NEXT:    jmp .LBB1_20
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB1_14: # %if.end335.i
+; CHECK-NEXT:    # in Loop: Header=BB1_10 Depth=1
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    testb %cl, %cl
-; CHECK-NEXT:    jne .LBB1_8
+; CHECK-NEXT:    jne .LBB1_10
 ; CHECK-NEXT:  # %bb.15: # %merge_other
-; CHECK-NEXT:    # in Loop: Header=BB1_8 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB1_10 Depth=1
 ; CHECK-NEXT:    # implicit-def: $cl
-; CHECK-NEXT:    jmp .LBB1_8
+; CHECK-NEXT:    jmp .LBB1_10
 ; CHECK-NEXT:  .LBB1_23:
 ; CHECK-NEXT:    movl $1, %ebx
 ; CHECK-NEXT:    jmp .LBB1_24
diff --git a/llvm/test/CodeGen/X86/win-catchpad.ll b/llvm/test/CodeGen/X86/win-catchpad.ll
index 59612bfe9a535ea..d2067dd4e51c24a 100644
--- a/llvm/test/CodeGen/X86/win-catchpad.ll
+++ b/llvm/test/CodeGen/X86/win-catchpad.ll
@@ -64,13 +64,13 @@ try.cont:
 ; X86: retl
 
 ; FIXME: These should be de-duplicated.
-; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken
-; X86-NEXT:                        # %handler2
+; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken
+; X86-NEXT:                        # %handler1
 ; X86-NEXT: addl $12, %ebp
 ; X86: jmp [[contbb]]
 
-; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken
-; X86-NEXT:                        # %handler1
+; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken
+; X86-NEXT:                        # %handler2
 ; X86-NEXT: addl $12, %ebp
 ; X86: jmp [[contbb]]
 
diff --git a/llvm/unittests/Support/BlockFrequencyTest.cpp b/llvm/unittests/Support/BlockFrequencyTest.cpp
index cf5cf9213501c47..d0e2c021e097597 100644
--- a/llvm/unittests/Support/BlockFrequencyTest.cpp
+++ b/llvm/unittests/Support/BlockFrequencyTest.cpp
@@ -124,4 +124,34 @@ TEST(BlockFrequencyTest, SaturatingRightShift) {
   EXPECT_EQ(Freq.getFrequency(), 0x1ULL);
 }
 
+TEST(BlockFrequencyTest, AlmostEqual) {
+  EXPECT_FALSE(BlockFrequency(0x1234).almostEqual(0, 20));
+  EXPECT_FALSE(BlockFrequency(0x1234).almostEqual(0x1233, 20));
+  EXPECT_TRUE(BlockFrequency(0x1234).almostEqual(0x1234, 20));
+
+  EXPECT_FALSE(BlockFrequency(0x1235).almostEqual(0x1234, 20));
+
+  EXPECT_TRUE(BlockFrequency(0x4129).almostEqual(0x4128, 1));
+  EXPECT_TRUE(BlockFrequency(0x4129).almostEqual(0x4128, 2));
+  EXPECT_TRUE(BlockFrequency(0x4129).almostEqual(0x4128, 8));
+  EXPECT_TRUE(BlockFrequency(0x4129).almostEqual(0x4128, 14));
+  EXPECT_FALSE(BlockFrequency(0x4129).almostEqual(0x4128, 15));
+  EXPECT_FALSE(BlockFrequency(0x4129).almostEqual(0x4128, 16));
+  EXPECT_FALSE(BlockFrequency(0x4129).almostEqual(0x4128, 63));
+
+  EXPECT_FALSE(BlockFrequency(0x10000000000).almostEqual(0x10, 5));
+
+  uint64_t Max = std::numeric_limits<uint64_t>::max();
+  EXPECT_TRUE(BlockFrequency(Max).almostEqual(Max, 0));
+  EXPECT_TRUE(BlockFrequency(Max).almostEqual(Max, 1));
+  EXPECT_TRUE(BlockFrequency(Max).almostEqual(Max, 63));
+
+  EXPECT_TRUE(BlockFrequency(Max).almostEqual(0, 0));
+  EXPECT_FALSE(BlockFrequency(Max).almostEqual(0, 1));
+  EXPECT_FALSE(BlockFrequency(Max).almostEqual(0, 63));
+
+  EXPECT_TRUE(BlockFrequency(0).almostEqual(0, 0));
+  EXPECT_TRUE(BlockFrequency(0).almostEqual(0, 1));
+  EXPECT_TRUE(BlockFrequency(0).almostEqual(0, 63));
+}
 }



More information about the llvm-commits mailing list