[llvm] [CodeGen] Perform early program return instead of using indirect branch (PR #102127)

Vikash Gupta via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 05:15:48 PDT 2024


https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/102127

>From 5b582e36892433c4c471e2b0ee37f1c7329f6ca5 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Thu, 1 Aug 2024 16:51:01 +0530
Subject: [PATCH 1/3] [CodeGen] Perform early program return instead of using
 indirect branch

If a long/indirect branch is to a return block, it might be better to
use a short jump over to an early return block rather than expanding
the out of range branch later in branch relaxation. For example :

L0  : condBranch L1
L2  : .....
    : .....

... large L2 block ....
    : .....
L1  : .....
    : return

If L2 is large enough, [condBranch L1] would need branch relaxation,
which can be alternatively handled relatively cheap such that it
eliminates the need of relaxation by performing early program return
before it as below :

L0  : condReverseBranch L2
L1" : .....
    : return
L2  : .....
    : .....

... large L2 block ....
    : .....
L1  : .....
    : return

The above example is one of teh case of analyzable branch in L0 block,
and current implementation covers only such branches to return block.
---
 llvm/include/llvm/CodeGen/Passes.h   |   4 +
 llvm/include/llvm/InitializePasses.h |   1 +
 llvm/lib/CodeGen/CMakeLists.txt      |   1 +
 llvm/lib/CodeGen/CodeGen.cpp         |   1 +
 llvm/lib/CodeGen/EarlyReturnPass.cpp | 444 +++++++++++++++++++++++++++
 5 files changed, 451 insertions(+)
 create mode 100644 llvm/lib/CodeGen/EarlyReturnPass.cpp

diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index cafb9781698a2..9b034eb130150 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -279,6 +279,10 @@ namespace llvm {
   /// predicating if/else block and insert select at the join point.
   extern char &EarlyIfPredicatorID;
 
+  /// EarlyReturnPass - This pass replaces out of range branch to return
+  /// blocks with early return block well within the range.
+  extern char &EarlyReturnPassID;
+
   /// This pass performs instruction combining using trace metrics to estimate
   /// critical-path and resource depth.
   extern char &MachineCombinerID;
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 13be9c11f0107..d6eafb7422978 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -98,6 +98,7 @@ void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&);
 void initializeEarlyIfConverterPass(PassRegistry&);
 void initializeEarlyIfPredicatorPass(PassRegistry &);
 void initializeEarlyMachineLICMPass(PassRegistry&);
+void initializeEarlyReturnPassPass(PassRegistry &);
 void initializeEarlyTailDuplicatePass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEHContGuardCatchretPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f1607f85c5b31..504e18abebbe8 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_component_library(LLVMCodeGen
   DFAPacketizer.cpp
   DwarfEHPrepare.cpp
   EarlyIfConversion.cpp
+  EarlyReturnPass.cpp
   EdgeBundles.cpp
   EHContGuardCatchret.cpp
   ExecutionDomainFix.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 31fa4c105cef8..f5725e38e8f55 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -38,6 +38,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeEarlyIfConverterPass(Registry);
   initializeEarlyIfPredicatorPass(Registry);
   initializeEarlyMachineLICMPass(Registry);
+  initializeEarlyReturnPassPass(Registry);
   initializeEarlyTailDuplicatePass(Registry);
   initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandLargeFpConvertLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/EarlyReturnPass.cpp b/llvm/lib/CodeGen/EarlyReturnPass.cpp
new file mode 100644
index 0000000000000..469081582f990
--- /dev/null
+++ b/llvm/lib/CodeGen/EarlyReturnPass.cpp
@@ -0,0 +1,444 @@
+//===- EarlyReturnPass.cpp - Basic Block Code Layout optimization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Desc HERE
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <cassert>
+#include <iterator>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "early-return"
+
+STATISTIC(NumEarlyReturn, "Number of early return optimization done");
+STATISTIC(NumDeadReturnBlocks, "Number of dead return blocks removed");
+
+namespace {
+
+#define MAX_OPTIMIZE_ATTEMPT 4
+
+class EarlyReturnPass : public MachineFunctionPass {
+  /// BasicBlockInfo - It stores the Offset and size (in bytes) for
+  /// machine basic blocks
+  struct BasicBlockInfo {
+    /// Offset - Distance from the beginning of the function to the beginning
+    /// of this basic block.
+    unsigned Offset = 0;
+
+    /// Size - Size of the basic block in bytes.  If the block contains
+    /// inline assembly, this is a worst case estimate.
+    ///   It does not account for any alignment padding whether from the
+    /// beginning of the block, or from an aligned jump table at the end.
+    unsigned Size = 0;
+
+    BasicBlockInfo() = default;
+  };
+
+  SmallVector<BasicBlockInfo, 16> BlockInfo;
+
+private:
+  MachineFunction *MF = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+  SmallVector<MachineBasicBlock *, 8> ReturnBlocks;
+
+  /// Perform the early return for the given branch \p MI
+  /// whose destination block is out of range.
+  bool introduceEarlyReturn(MachineInstr &MI);
+
+  /// Iterate the machine function, initializing the BlockInfo for all blocks
+  /// within it.
+  void initializeBasicBlockInfo();
+
+  /// Creates and return the newly inserted block after \p AfterBB.
+  /// It substitutes out of range \p BranchBB block branching coming
+  /// out from parent \p MBB.
+  MachineBasicBlock *createEarlyReturnMBB(MachineBasicBlock *MBB,
+                                          MachineBasicBlock *BranchBB,
+                                          MachineBasicBlock *AfterBB);
+
+  /// Copies machine instruction from \p SrcBB to \p DestBB,
+  /// along with the live-ins registers.
+  void copyMachineInstrWithLiveness(const MachineBasicBlock &SrcBB,
+                                    MachineBasicBlock *DestBB);
+
+  /// Returns true if the distance between \p MI and
+  /// \p DestBB can fit in MI's displacement field.
+  bool isBlockInRange(const MachineInstr &MI,
+                      const MachineBasicBlock &DestBB) const;
+
+  /// Updates the BlockInfo, starting from \p Start block,
+  /// to accommodate changes due to any newly inserted block.
+  void adjustBlockOffsets(MachineBasicBlock &Start);
+
+  /// Return the current offset of the specified machine
+  /// instruction \p MI from the start of the function.
+  unsigned getInstrOffset(const MachineInstr &MI) const;
+
+public:
+  static char ID;
+
+  EarlyReturnPass() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &mf) override;
+};
+
+} // end anonymous namespace
+
+char EarlyReturnPass::ID = 0;
+
+char &llvm::EarlyReturnPassID = EarlyReturnPass::ID;
+
+INITIALIZE_PASS(EarlyReturnPass, DEBUG_TYPE, "Branch Early Return Block", false,
+                false)
+
+bool EarlyReturnPass::introduceEarlyReturn(MachineInstr &MI) {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  MachineBasicBlock *NewTBB = nullptr, *NewFBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+
+  bool UnAnalyzableBranch = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+  bool NeedEarlyReturnForFBB =
+      FBB && FBB->isReturnBlock() && !isBlockInRange(MI, *FBB);
+
+  // TODO : Currently, the situation like multiple conditional branch
+  // not handled.
+  if (UnAnalyzableBranch) {
+    LLVM_DEBUG(dbgs() << "Branch is unanylazable in "
+                      << printMBBReference(*MBB));
+    return false;
+  }
+
+  // If Cond is non-empty, along with FBB as nullptr, it implies
+  // fall-through is happening via conditional branch. So, NewFBB would be
+  // that very block.
+  //
+  // Hence, NewFBB could be either be fall-through or valid FBB block.
+  if (!FBB && !Cond.empty()) {
+    NewFBB = &(*std::next(MachineFunction::iterator(MBB)));
+  } else {
+    NewFBB = FBB;
+  }
+
+  NewTBB = createEarlyReturnMBB(MBB, TBB, MBB);
+  if (NeedEarlyReturnForFBB) {
+    // If needed NewFBB would hold newly inserted block now.
+    NewFBB = createEarlyReturnMBB(MBB, FBB, NewTBB);
+  }
+
+  // Removing old branch, followed by inserting new branch to newly created
+  // blocks. if FBB is null, then fall-through would work fine.
+  unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+  int RemovedSize = 0;
+  int NewBrSize = 0;
+
+  TII->removeBranch(*MBB, &RemovedSize);
+  if (TBB && !FBB && Cond.empty()) {
+    // Do Nothing, fallthorugh would take care.
+  } else if (TBB && !FBB && !Cond.empty()) {
+    if (!TII->reverseBranchCondition(Cond)) {
+      TII->insertBranch(*MBB, NewFBB, nullptr, Cond, DL, &NewBrSize);
+    } else {
+      TII->insertBranch(*MBB, NewTBB, NewFBB, Cond, DL, &NewBrSize);
+    }
+  } else {
+    assert(TBB && FBB && !Cond.empty());
+    if (!TII->reverseBranchCondition(Cond)) {
+      TII->insertBranch(*MBB, NewFBB, nullptr, Cond, DL, &NewBrSize);
+    } else {
+      TII->insertBranch(*MBB, NewTBB, NewFBB, Cond, DL, &NewBrSize);
+    }
+  }
+
+  BBSize -= RemovedSize;
+  BBSize += NewBrSize;
+
+  // update the block offsets to account for newly created blocks.
+  adjustBlockOffsets(*MBB);
+
+  return true;
+}
+
+void EarlyReturnPass::initializeBasicBlockInfo() {
+  BlockInfo.clear();
+  BlockInfo.resize(MF->getNumBlockIDs());
+  ReturnBlocks.clear();
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it, which would be worst-case scenario.
+  for (MachineBasicBlock &MBB : *MF) {
+    unsigned &MBBSize = BlockInfo[MBB.getNumber()].Size;
+    MBBSize = 0;
+
+    for (const MachineInstr &MI : MBB)
+      MBBSize += TII->getInstSizeInBytes(MI);
+  }
+
+  // Compute block offsets for all blocks in MF.
+  adjustBlockOffsets(*(MF->begin()));
+}
+
+MachineBasicBlock *
+EarlyReturnPass::createEarlyReturnMBB(MachineBasicBlock *MBB,
+                                      MachineBasicBlock *BranchBB,
+                                      MachineBasicBlock *AfterBB) {
+  // Create new block and insert it after AfterBB.
+  MachineBasicBlock *NewBranchBB =
+      MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+  MF->insert(++AfterBB->getIterator(), NewBranchBB);
+
+  assert(MBB->isSuccessor(BranchBB));
+  MBB->replaceSuccessor(BranchBB, NewBranchBB);
+  assert(NewBranchBB->succ_empty());
+
+  // Copies MI into new block and add its entry into BlockInfo.
+  copyMachineInstrWithLiveness(*BranchBB, NewBranchBB);
+  BlockInfo.insert(BlockInfo.begin() + NewBranchBB->getNumber(),
+                   BasicBlockInfo());
+  BlockInfo[NewBranchBB->getNumber()].Size =
+      BlockInfo[BranchBB->getNumber()].Size;
+
+  LLVM_DEBUG(
+      dbgs()
+      << "Copies Machine instructions : Old return block -> New return block\n"
+      << printMBBReference(*BranchBB) << " from "
+      << printMBBReference(*NewBranchBB) << " for " << printMBBReference(*MBB)
+      << " comes after " << printMBBReference(*AfterBB) << '\n');
+
+  return NewBranchBB;
+}
+
+void EarlyReturnPass::copyMachineInstrWithLiveness(
+    const MachineBasicBlock &SrcBB, MachineBasicBlock *DestBB) {
+  for (const MachineInstr &I : SrcBB) {
+    MachineInstr *MI = MF->CloneMachineInstr(&I);
+
+    // Make a copy of the call site info.
+    if (I.isCandidateForCallSiteEntry())
+      MF->copyCallSiteInfo(&I, MI);
+
+    DestBB->insert(DestBB->end(), MI);
+  }
+
+  // Add live-ins from SrcBB to DestBB.
+  for (const MachineBasicBlock::RegisterMaskPair &LiveIn : SrcBB.liveins())
+    DestBB->addLiveIn(LiveIn);
+  DestBB->sortUniqueLiveIns();
+}
+
+bool EarlyReturnPass::isBlockInRange(const MachineInstr &MI,
+                                     const MachineBasicBlock &DestBB) const {
+  int64_t BrOffset = getInstrOffset(MI);
+  int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
+  int64_t distance = DestOffset - BrOffset;
+
+  if (TII->isBranchOffsetInRange(MI.getOpcode(), distance))
+    return true;
+
+  LLVM_DEBUG(dbgs() << "Out of range branch to destination "
+                    << printMBBReference(DestBB) << " from "
+                    << printMBBReference(*MI.getParent()) << " to "
+                    << DestOffset << " offset " << DestOffset - BrOffset << '\t'
+                    << MI);
+
+  return false;
+}
+
+void EarlyReturnPass::adjustBlockOffsets(MachineBasicBlock &Start) {
+  MachineFunction *MF = Start.getParent();
+
+  // Compute the offset immediately following this block. \p MBB is the
+  // block after PrevMBB.
+  auto postOffset = [&](const BasicBlockInfo &PrevMBBInfo,
+                        const MachineBasicBlock &MBB) -> unsigned {
+    const unsigned PO = PrevMBBInfo.Offset + PrevMBBInfo.Size;
+    const Align Alignment = MBB.getAlignment();
+    const Align ParentAlign = MF->getAlignment();
+    if (Alignment <= ParentAlign)
+      return alignTo(PO, Alignment);
+
+    // The alignment of this MBB is larger than the function's alignment, so we
+    // can't tell whether or not it will insert nops. Assume that it will.
+    return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value();
+  };
+
+  unsigned PrevNum = Start.getNumber();
+  for (auto &MBB :
+       make_range(std::next(MachineFunction::iterator(Start)), MF->end())) {
+    unsigned Num = MBB.getNumber();
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Includes the alignment of the current MBB block.
+    BlockInfo[Num].Offset = postOffset(BlockInfo[PrevNum], MBB);
+    PrevNum = Num;
+  }
+}
+
+unsigned EarlyReturnPass::getInstrOffset(const MachineInstr &MI) const {
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+  // Sum up the instructions before MI in MBB.
+  for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+
+  return Offset;
+}
+
+bool EarlyReturnPass::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  bool MadeChange = false;
+
+  LLVM_DEBUG(dbgs() << "***** Branch Early Return Started*****\n");
+
+  const TargetSubtargetInfo &ST = MF->getSubtarget();
+  TII = ST.getInstrInfo();
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF->RenumberBlocks();
+
+  // Initialize the basicBlock information by scanning the MF at start.
+  initializeBasicBlockInfo();
+
+  // Each MBB would require a minimum number of reoptimization
+  // attempt to reach most possible optimized state.
+  // (implicit assumption : branch is analyzable)
+  // <OR> -> Out of Range
+  //
+  // Case -1 : TBB && !FBB && Cond.empty() ->
+  // Max Attempt to solve = 1 {as it eliminates branch in MBB after once.}
+  // Ex: MBB : b TBB
+  //     .......
+  // <OR>TBB :
+  // ==> MBB :
+  //     NewTBB :
+  //
+  // Case -2 : TBB && !FBB && !Cond.empty() ->
+  // Max Attempt to solve = 2 {as it loops back to intial state in worst
+  // case scenario, after third attempt}
+  // Ex: MBB : be TBB
+  //     FBB :
+  // <OR>TBB :
+  // ==> MBB    : bne FBB
+  //     NewTBB :
+  // <OR>FBB    :
+  // ==> MBB    : be NewTBB
+  //     NewFBB :
+  // <OR>NewTBB :
+  //
+  // Case -3 : TBB && FBB && Cond.empty() ->
+  // Max Attempt to solve = 4 {as it loops back to previous state, from
+  // which triggering loop re-eval.}
+  // Ex: MBB : be TBB
+  //         : b  FBB
+  //     .......
+  // <OR>TBB :
+  //     FBB :
+  // ==> MBB    : bne FBB
+  //     NewTBB :
+  //     .......
+  // <OR>FBB    :
+  // ==> MBB    : be NewTBB
+  //     NewFBB :
+  // <OR>NewTBB :
+  //     .......
+  //
+  // This last state is as same as initial state of case-2, implying after
+  // 2 more attempts, it would saturate.
+
+  // Main Logic performing early return block insertion for given machine
+  // function.
+  for (MachineBasicBlock &MBB : *MF) {
+    if (MBB.isReturnBlock()) {
+      ReturnBlocks.push_back(&MBB);
+      continue;
+    }
+
+    unsigned NumAttempt = 0;
+    while (NumAttempt < MAX_OPTIMIZE_ATTEMPT) {
+      MachineBasicBlock::iterator Curr = MBB.getFirstTerminator();
+      if (Curr == MBB.end())
+        break;
+
+      MachineInstr &MI = *Curr;
+      if (!MI.isConditionalBranch() && !MI.isUnconditionalBranch())
+        break;
+
+      MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+      if (DestBB && DestBB->isReturnBlock() && !isBlockInRange(MI, *DestBB)) {
+        if (introduceEarlyReturn(MI)) {
+          MadeChange = true;
+          NumEarlyReturn++;
+        } else {
+          // If unable to introduce early return (due to unanylazable branch),
+          // no benefit of trying it again for MBB.
+          break;
+        }
+      } else {
+        // If no out of range Return block found, no need to attempt anymore.
+        break;
+      }
+
+      NumAttempt++;
+    }
+
+    if (NumAttempt == MAX_OPTIMIZE_ATTEMPT) {
+      LLVM_DEBUG(dbgs() << "Reached the most optimized possible state for "
+                        << printMBBReference(MBB) << '\n');
+    }
+  }
+
+  // Now, check for dead return block, only if any changes were made.
+  if (MadeChange)
+    for (MachineBasicBlock *RBB : ReturnBlocks) {
+      if (RBB->pred_empty() && !RBB->isMachineBlockAddressTaken()) {
+        LLVM_DEBUG(dbgs() << "\nRemoving this block: "
+                          << printMBBReference(*RBB));
+
+        assert(RBB->succ_empty() && "Dead block is not a return block");
+        // Update call site info.
+        for (const MachineInstr &MI : *RBB)
+          if (MI.shouldUpdateCallSiteInfo())
+            MF->eraseCallSiteInfo(&MI);
+
+        // Remove the block.
+        MF->erase(RBB);
+        ++NumDeadReturnBlocks;
+      }
+    }
+
+  BlockInfo.clear();
+  ReturnBlocks.clear();
+
+  LLVM_DEBUG(dbgs() << "***** Branch Early Return Ended*****\n");
+
+  return MadeChange;
+}
\ No newline at end of file

>From fd204e4f48d572a834bc8732de3fc26c68aefc8d Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Tue, 6 Aug 2024 17:25:40 +0530
Subject: [PATCH 2/3] [CodeGen] [LIT] Addition of LIT mir testcases for
 pre-commit test.

---
 .../branch-early-return-conditional.mir       | 180 ++++++++++++++++++
 .../branch-early-return-unconditional.mir     | 160 ++++++++++++++++
 2 files changed, 340 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir

diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
new file mode 100644
index 0000000000000..9923bd1aca289
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
@@ -0,0 +1,180 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+
+---
+name:            branch_early_return_conditional
+alignment:       1
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr12' }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+  scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+body:             |
+  ; CHECK-LABEL: name: branch_early_return_conditional
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.5(0x30000000)
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+  ; CHECK-NEXT:   $sgpr81 = S_MOV_B32 killed $sgpr12
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+  ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+  ; CHECK-NEXT:   S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.1, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5.entry:
+  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+  ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+  ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+  ; CHECK-NEXT:   $sgpr81 = S_MOV_B32 killed $sgpr82
+  ; CHECK-NEXT:   $sgpr82 = S_MOV_B32 killed $sgpr83
+  ; CHECK-NEXT:   $sgpr83 = S_MOV_B32 killed $sgpr84
+  ; CHECK-NEXT:   $sgpr84 = S_MOV_B32 killed $sgpr85
+  ; CHECK-NEXT:   $sgpr101 = S_MOV_B32 killed $vcc_lo
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.7(0x7c000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
+  ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.3, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+  ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+  ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 killed $sgpr5
+  ; CHECK-NEXT:   $sgpr5 = S_MOV_B32 killed $sgpr6
+  ; CHECK-NEXT:   $sgpr6 = S_MOV_B32 killed $sgpr7
+  ; CHECK-NEXT:   $sgpr7 = S_MOV_B32 killed $sgpr8
+  ; CHECK-NEXT:   $sgpr8 = S_MOV_B32 killed $sgpr9
+  ; CHECK-NEXT:   $sgpr9 = S_MOV_B32 killed $sgpr10
+  ; CHECK-NEXT:   $sgpr10 = S_MOV_B32 killed $sgpr11
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_WAITCNT 3952
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+    liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+
+    S_WAITCNT 0
+    $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+    $exec = S_MOV_B64 killed $sgpr4_sgpr5
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+    $sgpr81 = S_MOV_B32 killed $sgpr12
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+    $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+    S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+  bb.1:
+    liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+    $sgpr81 = S_MOV_B32 killed $sgpr82
+    $sgpr82 = S_MOV_B32 killed $sgpr83
+    $sgpr83 = S_MOV_B32 killed $sgpr84
+    $sgpr84 = S_MOV_B32 killed $sgpr85
+    $sgpr101 = S_MOV_B32 killed $vcc_lo
+
+  bb.2:
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+    liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    INLINEASM &"v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+    S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+
+  bb.3:
+    liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    $sgpr4 = S_MOV_B32 killed $sgpr5
+    $sgpr5 = S_MOV_B32 killed $sgpr6
+    $sgpr6 = S_MOV_B32 killed $sgpr7
+    $sgpr7 = S_MOV_B32 killed $sgpr8
+    $sgpr8 = S_MOV_B32 killed $sgpr9
+    $sgpr9 = S_MOV_B32 killed $sgpr10
+    $sgpr10 = S_MOV_B32 killed $sgpr11
+    S_SETPC_B64 $sgpr4_sgpr5
+
+  bb.4:
+    liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+
+    $sgpr101 = V_READLANE_B32 $vgpr1, 6
+    $sgpr100 = V_READLANE_B32 $vgpr1, 5
+    $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+    $exec = S_MOV_B64 killed $sgpr4_sgpr5
+    S_WAITCNT 3952
+    S_SETPC_B64_return undef $sgpr30_sgpr31
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
new file mode 100644
index 0000000000000..a17b88cc6cdbf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
@@ -0,0 +1,160 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+
+---
+name:            branch_early_return_conditional
+alignment:       1
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr12' }
+machineFunctionInfo:
+  stackPtrOffsetReg: '$sgpr32'
+  scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+body:             |
+  ; CHECK-LABEL: name: branch_early_return_conditional
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+  ; CHECK-NEXT:   $sgpr81 = S_MOV_B32 killed $sgpr12
+  ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+  ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+  ; CHECK-NEXT:   S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"v_nop_e64\0A  v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
+  ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr82_sgpr83 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+  ; CHECK-NEXT:   $sgpr82 = S_ADD_U32 $sgpr82, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+  ; CHECK-NEXT:   $sgpr83 = S_ADDC_U32 $sgpr83, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr82_sgpr83
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 killed $sgpr5
+  ; CHECK-NEXT:   $sgpr5 = S_MOV_B32 killed $sgpr6
+  ; CHECK-NEXT:   $sgpr6 = S_MOV_B32 killed $sgpr7
+  ; CHECK-NEXT:   $sgpr7 = S_MOV_B32 killed $sgpr8
+  ; CHECK-NEXT:   $sgpr8 = S_MOV_B32 killed $sgpr9
+  ; CHECK-NEXT:   $sgpr9 = S_MOV_B32 killed $sgpr10
+  ; CHECK-NEXT:   $sgpr10 = S_MOV_B32 killed $sgpr11
+  ; CHECK-NEXT:   $sgpr11 = S_MOV_B32 killed $sgpr12
+  ; CHECK-NEXT:   $sgpr12 = S_MOV_B32 killed $sgpr13
+  ; CHECK-NEXT:   $sgpr13 = S_MOV_B32 killed $sgpr14
+  ; CHECK-NEXT:   $sgpr14 = S_MOV_B32 killed $sgpr15
+  ; CHECK-NEXT:   $sgpr15 = S_MOV_B32 killed $sgpr16
+  ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 killed $sgpr17
+  ; CHECK-NEXT:   $sgpr17 = S_MOV_B32 killed $sgpr18
+  ; CHECK-NEXT:   $sgpr18 = S_MOV_B32 killed $sgpr19
+  ; CHECK-NEXT:   $sgpr19 = S_MOV_B32 killed $sgpr20
+  ; CHECK-NEXT:   $sgpr20 = S_MOV_B32 killed $sgpr21
+  ; CHECK-NEXT:   $sgpr21 = S_MOV_B32 killed $sgpr22
+  ; CHECK-NEXT:   $sgpr22 = S_MOV_B32 killed $sgpr23
+  ; CHECK-NEXT:   $sgpr23 = S_MOV_B32 killed $sgpr24
+  ; CHECK-NEXT:   $sgpr24 = S_MOV_B32 killed $sgpr25
+  ; CHECK-NEXT:   $sgpr25 = S_MOV_B32 killed $sgpr26
+  ; CHECK-NEXT:   $sgpr26 = S_MOV_B32 killed $sgpr27
+  ; CHECK-NEXT:   $sgpr27 = S_MOV_B32 killed $sgpr28
+  ; CHECK-NEXT:   $sgpr28 = S_MOV_B32 killed $sgpr29
+  ; CHECK-NEXT:   $sgpr29 = S_MOV_B32 killed $sgpr30
+  ; CHECK-NEXT:   S_SETPC_B64 $sgpr4_sgpr5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_WAITCNT 3952
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.2(0x30000000)
+    liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+
+    S_WAITCNT 0
+    $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+    BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+    $exec = S_MOV_B64 killed $sgpr4_sgpr5
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+    $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+    $sgpr81 = S_MOV_B32 killed $sgpr12
+    $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+    $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+    S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+
+  bb.1:
+    successors: %bb.4(0x04000000)
+    liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    INLINEASM &"v_nop_e64\0A  v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+    S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+    S_BRANCH %bb.4
+
+  bb.2:
+    liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+    $sgpr4 = S_MOV_B32 killed $sgpr5
+    $sgpr5 = S_MOV_B32 killed $sgpr6
+    $sgpr6 = S_MOV_B32 killed $sgpr7
+    $sgpr7 = S_MOV_B32 killed $sgpr8
+    $sgpr8 = S_MOV_B32 killed $sgpr9
+    $sgpr9 = S_MOV_B32 killed $sgpr10
+    $sgpr10 = S_MOV_B32 killed $sgpr11
+    $sgpr11 = S_MOV_B32 killed $sgpr12
+    $sgpr12 = S_MOV_B32 killed $sgpr13
+    $sgpr13 = S_MOV_B32 killed $sgpr14
+    $sgpr14 = S_MOV_B32 killed $sgpr15
+    $sgpr15 = S_MOV_B32 killed $sgpr16
+    $sgpr16 = S_MOV_B32 killed $sgpr17
+    $sgpr17 = S_MOV_B32 killed $sgpr18
+    $sgpr18 = S_MOV_B32 killed $sgpr19
+    $sgpr19 = S_MOV_B32 killed $sgpr20
+    $sgpr20 = S_MOV_B32 killed $sgpr21
+    $sgpr21 = S_MOV_B32 killed $sgpr22
+    $sgpr22 = S_MOV_B32 killed $sgpr23
+    $sgpr23 = S_MOV_B32 killed $sgpr24
+    $sgpr24 = S_MOV_B32 killed $sgpr25
+    $sgpr25 = S_MOV_B32 killed $sgpr26
+    $sgpr26 = S_MOV_B32 killed $sgpr27
+    $sgpr27 = S_MOV_B32 killed $sgpr28
+    $sgpr28 = S_MOV_B32 killed $sgpr29
+    $sgpr29 = S_MOV_B32 killed $sgpr30
+    S_SETPC_B64 $sgpr4_sgpr5
+
+  bb.4:
+    liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+
+    $sgpr101 = V_READLANE_B32 $vgpr1, 6
+    $sgpr100 = V_READLANE_B32 $vgpr1, 5
+    $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+    $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+    $exec = S_MOV_B64 killed $sgpr4_sgpr5
+    S_WAITCNT 3952
+    S_SETPC_B64_return undef $sgpr30_sgpr31
+
+...

>From dc67a8c66905e3516a47e47bea60b91b1ec903f1 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Tue, 6 Aug 2024 17:48:38 +0530
Subject: [PATCH 3/3] Updated LIT testCase to test for early return pass.

---
 .../branch-early-return-conditional.mir       | 68 +++++++------------
 .../branch-early-return-unconditional.mir     | 37 ++++------
 2 files changed, 37 insertions(+), 68 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
index 9923bd1aca289..2ecab6f30b8a2 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=early-return,branch-relaxation %s -o - | FileCheck %s
 
 ---
 name:            branch_early_return_conditional
@@ -13,7 +13,7 @@ machineFunctionInfo:
 body:             |
   ; CHECK-LABEL: name: branch_early_return_conditional
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.5(0x30000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_WAITCNT 0
@@ -28,23 +28,22 @@ body:             |
   ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
   ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
   ; CHECK-NEXT:   S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
-  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.1, implicit killed $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.entry:
-  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
+  ; CHECK-NEXT: bb.1.entry:
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
-  ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
-  ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
-  ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
-  ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
-  ; CHECK-NEXT:   S_SETPC_B64 $sgpr0_sgpr1
+  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_WAITCNT 3952
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
@@ -53,10 +52,10 @@ body:             |
   ; CHECK-NEXT:   $sgpr83 = S_MOV_B32 killed $sgpr84
   ; CHECK-NEXT:   $sgpr84 = S_MOV_B32 killed $sgpr85
   ; CHECK-NEXT:   $sgpr101 = S_MOV_B32 killed $vcc_lo
-  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT:   S_BRANCH %bb.3
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.8:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
@@ -64,16 +63,16 @@ body:             |
   ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.7(0x7c000000)
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.5(0x7c000000)
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   INLINEASM &"v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64\0A    v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
   ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
-  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.3, implicit killed $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.4, implicit killed $scc
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.7:
-  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
@@ -85,7 +84,7 @@ body:             |
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
   ; CHECK-NEXT:   S_SETPC_B64 $sgpr0_sgpr1
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT: bb.4:
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 killed $sgpr5
@@ -96,27 +95,6 @@ body:             |
   ; CHECK-NEXT:   $sgpr9 = S_MOV_B32 killed $sgpr10
   ; CHECK-NEXT:   $sgpr10 = S_MOV_B32 killed $sgpr11
   ; CHECK-NEXT:   S_SETPC_B64 $sgpr4_sgpr5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
-  ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-  ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
-  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
-  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
-  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; CHECK-NEXT:   S_WAITCNT 3952
-  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
   bb.0.entry:
     successors: %bb.1(0x50000000), %bb.4(0x30000000)
     liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
index a17b88cc6cdbf..1fe84c5e569a0 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=early-return,branch-relaxation %s -o - | FileCheck %s
 
 ---
 name:            branch_early_return_conditional
@@ -13,7 +13,7 @@ machineFunctionInfo:
 body:             |
   ; CHECK-LABEL: name: branch_early_return_conditional
   ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.2(0x30000000)
+  ; CHECK-NEXT:   successors: %bb.1(0x50000000), %bb.3(0x30000000)
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_WAITCNT 0
@@ -28,25 +28,28 @@ body:             |
   ; CHECK-NEXT:   $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
   ; CHECK-NEXT:   $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
   ; CHECK-NEXT:   S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
-  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.3, implicit killed $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   INLINEASM &"v_nop_e64\0A  v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
   ; CHECK-NEXT:   S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $sgpr82_sgpr83 = S_GETPC_B64 post-instr-symbol <mcsymbol >
-  ; CHECK-NEXT:   $sgpr82 = S_ADD_U32 $sgpr82, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
-  ; CHECK-NEXT:   $sgpr83 = S_ADDC_U32 $sgpr83, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
-  ; CHECK-NEXT:   S_SETPC_B64 $sgpr82_sgpr83
+  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
+  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   S_WAITCNT 3952
+  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 killed $sgpr5
@@ -76,18 +79,6 @@ body:             |
   ; CHECK-NEXT:   $sgpr28 = S_MOV_B32 killed $sgpr29
   ; CHECK-NEXT:   $sgpr29 = S_MOV_B32 killed $sgpr30
   ; CHECK-NEXT:   S_SETPC_B64 $sgpr4_sgpr5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $sgpr101 = V_READLANE_B32 $vgpr1, 6
-  ; CHECK-NEXT:   $sgpr100 = V_READLANE_B32 $vgpr1, 5
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
-  ; CHECK-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
-  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; CHECK-NEXT:   S_WAITCNT 3952
-  ; CHECK-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
   bb.0.entry:
     successors: %bb.1(0x50000000), %bb.2(0x30000000)
     liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32



More information about the llvm-commits mailing list