[llvm] [CodeGen] Perform early program return instead of using indirect branch (PR #102127)
Vikash Gupta via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 6 05:15:48 PDT 2024
https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/102127
>From 5b582e36892433c4c471e2b0ee37f1c7329f6ca5 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Thu, 1 Aug 2024 16:51:01 +0530
Subject: [PATCH 1/3] [CodeGen] Perform early program return instead of using
indirect branch
If a long/indirect branch is to a return block, it might be better to
use a short jump over to an early return block rather than expanding
the out of range branch later in branch relaxation. For example :
L0 : condBranch L1
L2 : .....
: .....
... large L2 block ....
: .....
L1 : .....
: return
If L2 is large enough, [condBranch L1] would need branch relaxation,
which can be alternatively handled relatively cheap such that it
eliminates the need of relaxation by performing early program return
before it as below :
L0 : condReverseBranch L2
L1" : .....
: return
L2 : .....
: .....
... large L2 block ....
: .....
L1 : .....
: return
The above example is one of teh case of analyzable branch in L0 block,
and current implementation covers only such branches to return block.
---
llvm/include/llvm/CodeGen/Passes.h | 4 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/CodeGen.cpp | 1 +
llvm/lib/CodeGen/EarlyReturnPass.cpp | 444 +++++++++++++++++++++++++++
5 files changed, 451 insertions(+)
create mode 100644 llvm/lib/CodeGen/EarlyReturnPass.cpp
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index cafb9781698a2..9b034eb130150 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -279,6 +279,10 @@ namespace llvm {
/// predicating if/else block and insert select at the join point.
extern char &EarlyIfPredicatorID;
+ /// EarlyReturnPass - This pass replaces out of range branch to return
+ /// blocks with early return block well within the range.
+ extern char &EarlyReturnPassID;
+
/// This pass performs instruction combining using trace metrics to estimate
/// critical-path and resource depth.
extern char &MachineCombinerID;
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 13be9c11f0107..d6eafb7422978 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -98,6 +98,7 @@ void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&);
void initializeEarlyIfConverterPass(PassRegistry&);
void initializeEarlyIfPredicatorPass(PassRegistry &);
void initializeEarlyMachineLICMPass(PassRegistry&);
+void initializeEarlyReturnPassPass(PassRegistry &);
void initializeEarlyTailDuplicatePass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f1607f85c5b31..504e18abebbe8 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -52,6 +52,7 @@ add_llvm_component_library(LLVMCodeGen
DFAPacketizer.cpp
DwarfEHPrepare.cpp
EarlyIfConversion.cpp
+ EarlyReturnPass.cpp
EdgeBundles.cpp
EHContGuardCatchret.cpp
ExecutionDomainFix.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 31fa4c105cef8..f5725e38e8f55 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -38,6 +38,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfConverterPass(Registry);
initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
+ initializeEarlyReturnPassPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
initializeExpandLargeDivRemLegacyPassPass(Registry);
initializeExpandLargeFpConvertLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/EarlyReturnPass.cpp b/llvm/lib/CodeGen/EarlyReturnPass.cpp
new file mode 100644
index 0000000000000..469081582f990
--- /dev/null
+++ b/llvm/lib/CodeGen/EarlyReturnPass.cpp
@@ -0,0 +1,444 @@
+//===- EarlyReturnPass.cpp - Basic Block Code Layout optimization ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Desc HERE
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <cassert>
+#include <iterator>
+#include <memory>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "early-return"
+
+STATISTIC(NumEarlyReturn, "Number of early return optimization done");
+STATISTIC(NumDeadReturnBlocks, "Number of dead return blocks removed");
+
+namespace {
+
+#define MAX_OPTIMIZE_ATTEMPT 4
+
+class EarlyReturnPass : public MachineFunctionPass {
+ /// BasicBlockInfo - It stores the Offset and size (in bytes) for
+ /// machine basic blocks
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ unsigned Offset = 0;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ /// It does not account for any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size = 0;
+
+ BasicBlockInfo() = default;
+ };
+
+ SmallVector<BasicBlockInfo, 16> BlockInfo;
+
+private:
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ SmallVector<MachineBasicBlock *, 8> ReturnBlocks;
+
+ /// Perform the early return for the given branch \p MI
+ /// whose destination block is out of range.
+ bool introduceEarlyReturn(MachineInstr &MI);
+
+ /// Iterate the machine function, initializing the BlockInfo for all blocks
+ /// within it.
+ void initializeBasicBlockInfo();
+
+ /// Creates and return the newly inserted block after \p AfterBB.
+ /// It substitutes out of range \p BranchBB block branching coming
+ /// out from parent \p MBB.
+ MachineBasicBlock *createEarlyReturnMBB(MachineBasicBlock *MBB,
+ MachineBasicBlock *BranchBB,
+ MachineBasicBlock *AfterBB);
+
+ /// Copies machine instruction from \p SrcBB to \p DestBB,
+ /// along with the live-ins registers.
+ void copyMachineInstrWithLiveness(const MachineBasicBlock &SrcBB,
+ MachineBasicBlock *DestBB);
+
+ /// Returns true if the distance between \p MI and
+ /// \p DestBB can fit in MI's displacement field.
+ bool isBlockInRange(const MachineInstr &MI,
+ const MachineBasicBlock &DestBB) const;
+
+ /// Updates the BlockInfo, starting from \p Start block,
+ /// to accommodate changes due to any newly inserted block.
+ void adjustBlockOffsets(MachineBasicBlock &Start);
+
+ /// Return the current offset of the specified machine
+ /// instruction \p MI from the start of the function.
+ unsigned getInstrOffset(const MachineInstr &MI) const;
+
+public:
+ static char ID;
+
+ EarlyReturnPass() : MachineFunctionPass(ID) {}
+
+ bool runOnMachineFunction(MachineFunction &mf) override;
+};
+
+} // end anonymous namespace
+
+char EarlyReturnPass::ID = 0;
+
+char &llvm::EarlyReturnPassID = EarlyReturnPass::ID;
+
+INITIALIZE_PASS(EarlyReturnPass, DEBUG_TYPE, "Branch Early Return Block", false,
+ false)
+
+bool EarlyReturnPass::introduceEarlyReturn(MachineInstr &MI) {
+ DebugLoc DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ MachineBasicBlock *NewTBB = nullptr, *NewFBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+
+ bool UnAnalyzableBranch = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+ bool NeedEarlyReturnForFBB =
+ FBB && FBB->isReturnBlock() && !isBlockInRange(MI, *FBB);
+
+ // TODO : Currently, the situation like multiple conditional branch
+ // not handled.
+ if (UnAnalyzableBranch) {
+ LLVM_DEBUG(dbgs() << "Branch is unanylazable in "
+ << printMBBReference(*MBB));
+ return false;
+ }
+
+ // If Cond is non-empty, along with FBB as nullptr, it implies
+ // fall-through is happening via conditional branch. So, NewFBB would be
+ // that very block.
+ //
+ // Hence, NewFBB could be either be fall-through or valid FBB block.
+ if (!FBB && !Cond.empty()) {
+ NewFBB = &(*std::next(MachineFunction::iterator(MBB)));
+ } else {
+ NewFBB = FBB;
+ }
+
+ NewTBB = createEarlyReturnMBB(MBB, TBB, MBB);
+ if (NeedEarlyReturnForFBB) {
+ // If needed NewFBB would hold newly inserted block now.
+ NewFBB = createEarlyReturnMBB(MBB, FBB, NewTBB);
+ }
+
+ // Removing old branch, followed by inserting new branch to newly created
+ // blocks. if FBB is null, then fall-through would work fine.
+ unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
+ int RemovedSize = 0;
+ int NewBrSize = 0;
+
+ TII->removeBranch(*MBB, &RemovedSize);
+ if (TBB && !FBB && Cond.empty()) {
+ // Do Nothing, fallthorugh would take care.
+ } else if (TBB && !FBB && !Cond.empty()) {
+ if (!TII->reverseBranchCondition(Cond)) {
+ TII->insertBranch(*MBB, NewFBB, nullptr, Cond, DL, &NewBrSize);
+ } else {
+ TII->insertBranch(*MBB, NewTBB, NewFBB, Cond, DL, &NewBrSize);
+ }
+ } else {
+ assert(TBB && FBB && !Cond.empty());
+ if (!TII->reverseBranchCondition(Cond)) {
+ TII->insertBranch(*MBB, NewFBB, nullptr, Cond, DL, &NewBrSize);
+ } else {
+ TII->insertBranch(*MBB, NewTBB, NewFBB, Cond, DL, &NewBrSize);
+ }
+ }
+
+ BBSize -= RemovedSize;
+ BBSize += NewBrSize;
+
+ // update the block offsets to account for newly created blocks.
+ adjustBlockOffsets(*MBB);
+
+ return true;
+}
+
+void EarlyReturnPass::initializeBasicBlockInfo() {
+ BlockInfo.clear();
+ BlockInfo.resize(MF->getNumBlockIDs());
+ ReturnBlocks.clear();
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it, which would be worst-case scenario.
+ for (MachineBasicBlock &MBB : *MF) {
+ unsigned &MBBSize = BlockInfo[MBB.getNumber()].Size;
+ MBBSize = 0;
+
+ for (const MachineInstr &MI : MBB)
+ MBBSize += TII->getInstSizeInBytes(MI);
+ }
+
+ // Compute block offsets for all blocks in MF.
+ adjustBlockOffsets(*(MF->begin()));
+}
+
+MachineBasicBlock *
+EarlyReturnPass::createEarlyReturnMBB(MachineBasicBlock *MBB,
+ MachineBasicBlock *BranchBB,
+ MachineBasicBlock *AfterBB) {
+ // Create new block and insert it after AfterBB.
+ MachineBasicBlock *NewBranchBB =
+ MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF->insert(++AfterBB->getIterator(), NewBranchBB);
+
+ assert(MBB->isSuccessor(BranchBB));
+ MBB->replaceSuccessor(BranchBB, NewBranchBB);
+ assert(NewBranchBB->succ_empty());
+
+ // Copies MI into new block and add its entry into BlockInfo.
+ copyMachineInstrWithLiveness(*BranchBB, NewBranchBB);
+ BlockInfo.insert(BlockInfo.begin() + NewBranchBB->getNumber(),
+ BasicBlockInfo());
+ BlockInfo[NewBranchBB->getNumber()].Size =
+ BlockInfo[BranchBB->getNumber()].Size;
+
+ LLVM_DEBUG(
+ dbgs()
+ << "Copies Machine instructions : Old return block -> New return block\n"
+ << printMBBReference(*BranchBB) << " from "
+ << printMBBReference(*NewBranchBB) << " for " << printMBBReference(*MBB)
+ << " comes after " << printMBBReference(*AfterBB) << '\n');
+
+ return NewBranchBB;
+}
+
+void EarlyReturnPass::copyMachineInstrWithLiveness(
+ const MachineBasicBlock &SrcBB, MachineBasicBlock *DestBB) {
+ for (const MachineInstr &I : SrcBB) {
+ MachineInstr *MI = MF->CloneMachineInstr(&I);
+
+ // Make a copy of the call site info.
+ if (I.isCandidateForCallSiteEntry())
+ MF->copyCallSiteInfo(&I, MI);
+
+ DestBB->insert(DestBB->end(), MI);
+ }
+
+ // Add live-ins from SrcBB to DestBB.
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : SrcBB.liveins())
+ DestBB->addLiveIn(LiveIn);
+ DestBB->sortUniqueLiveIns();
+}
+
+bool EarlyReturnPass::isBlockInRange(const MachineInstr &MI,
+ const MachineBasicBlock &DestBB) const {
+ int64_t BrOffset = getInstrOffset(MI);
+ int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
+ int64_t distance = DestOffset - BrOffset;
+
+ if (TII->isBranchOffsetInRange(MI.getOpcode(), distance))
+ return true;
+
+ LLVM_DEBUG(dbgs() << "Out of range branch to destination "
+ << printMBBReference(DestBB) << " from "
+ << printMBBReference(*MI.getParent()) << " to "
+ << DestOffset << " offset " << DestOffset - BrOffset << '\t'
+ << MI);
+
+ return false;
+}
+
+void EarlyReturnPass::adjustBlockOffsets(MachineBasicBlock &Start) {
+ MachineFunction *MF = Start.getParent();
+
+ // Compute the offset immediately following this block. \p MBB is the
+ // block after PrevMBB.
+ auto postOffset = [&](const BasicBlockInfo &PrevMBBInfo,
+ const MachineBasicBlock &MBB) -> unsigned {
+ const unsigned PO = PrevMBBInfo.Offset + PrevMBBInfo.Size;
+ const Align Alignment = MBB.getAlignment();
+ const Align ParentAlign = MF->getAlignment();
+ if (Alignment <= ParentAlign)
+ return alignTo(PO, Alignment);
+
+ // The alignment of this MBB is larger than the function's alignment, so we
+ // can't tell whether or not it will insert nops. Assume that it will.
+ return alignTo(PO, Alignment) + Alignment.value() - ParentAlign.value();
+ };
+
+ unsigned PrevNum = Start.getNumber();
+ for (auto &MBB :
+ make_range(std::next(MachineFunction::iterator(Start)), MF->end())) {
+ unsigned Num = MBB.getNumber();
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Includes the alignment of the current MBB block.
+ BlockInfo[Num].Offset = postOffset(BlockInfo[PrevNum], MBB);
+ PrevNum = Num;
+ }
+}
+
+unsigned EarlyReturnPass::getInstrOffset(const MachineInstr &MI) const {
+ const MachineBasicBlock *MBB = MI.getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+ // Sum up the instructions before MI in MBB.
+ for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->getInstSizeInBytes(*I);
+ }
+
+ return Offset;
+}
+
+bool EarlyReturnPass::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bool MadeChange = false;
+
+ LLVM_DEBUG(dbgs() << "***** Branch Early Return Started*****\n");
+
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ TII = ST.getInstrInfo();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ // Initialize the basicBlock information by scanning the MF at start.
+ initializeBasicBlockInfo();
+
+ // Each MBB would require a minimum number of reoptimization
+ // attempt to reach most possible optimized state.
+ // (implicit assumption : branch is analyzable)
+ // <OR> -> Out of Range
+ //
+ // Case -1 : TBB && !FBB && Cond.empty() ->
+ // Max Attempt to solve = 1 {as it eliminates branch in MBB after once.}
+ // Ex: MBB : b TBB
+ // .......
+ // <OR>TBB :
+ // ==> MBB :
+ // NewTBB :
+ //
+ // Case -2 : TBB && !FBB && !Cond.empty() ->
+ // Max Attempt to solve = 2 {as it loops back to intial state in worst
+ // case scenario, after third attempt}
+ // Ex: MBB : be TBB
+ // FBB :
+ // <OR>TBB :
+ // ==> MBB : bne FBB
+ // NewTBB :
+ // <OR>FBB :
+ // ==> MBB : be NewTBB
+ // NewFBB :
+ // <OR>NewTBB :
+ //
+ // Case -3 : TBB && FBB && Cond.empty() ->
+ // Max Attempt to solve = 4 {as it loops back to previous state, from
+ // which triggering loop re-eval.}
+ // Ex: MBB : be TBB
+ // : b FBB
+ // .......
+ // <OR>TBB :
+ // FBB :
+ // ==> MBB : bne FBB
+ // NewTBB :
+ // .......
+ // <OR>FBB :
+ // ==> MBB : be NewTBB
+ // NewFBB :
+ // <OR>NewTBB :
+ // .......
+ //
+ // This last state is as same as initial state of case-2, implying after
+ // 2 more attempts, it would saturate.
+
+ // Main Logic performing early return block insertion for given machine
+ // function.
+ for (MachineBasicBlock &MBB : *MF) {
+ if (MBB.isReturnBlock()) {
+ ReturnBlocks.push_back(&MBB);
+ continue;
+ }
+
+ unsigned NumAttempt = 0;
+ while (NumAttempt < MAX_OPTIMIZE_ATTEMPT) {
+ MachineBasicBlock::iterator Curr = MBB.getFirstTerminator();
+ if (Curr == MBB.end())
+ break;
+
+ MachineInstr &MI = *Curr;
+ if (!MI.isConditionalBranch() && !MI.isUnconditionalBranch())
+ break;
+
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ if (DestBB && DestBB->isReturnBlock() && !isBlockInRange(MI, *DestBB)) {
+ if (introduceEarlyReturn(MI)) {
+ MadeChange = true;
+ NumEarlyReturn++;
+ } else {
+ // If unable to introduce early return (due to unanylazable branch),
+ // no benefit of trying it again for MBB.
+ break;
+ }
+ } else {
+ // If no out of range Return block found, no need to attempt anymore.
+ break;
+ }
+
+ NumAttempt++;
+ }
+
+ if (NumAttempt == MAX_OPTIMIZE_ATTEMPT) {
+ LLVM_DEBUG(dbgs() << "Reached the most optimized possible state for "
+ << printMBBReference(MBB) << '\n');
+ }
+ }
+
+ // Now, check for dead return block, only if any changes were made.
+ if (MadeChange)
+ for (MachineBasicBlock *RBB : ReturnBlocks) {
+ if (RBB->pred_empty() && !RBB->isMachineBlockAddressTaken()) {
+ LLVM_DEBUG(dbgs() << "\nRemoving this block: "
+ << printMBBReference(*RBB));
+
+ assert(RBB->succ_empty() && "Dead block is not a return block");
+ // Update call site info.
+ for (const MachineInstr &MI : *RBB)
+ if (MI.shouldUpdateCallSiteInfo())
+ MF->eraseCallSiteInfo(&MI);
+
+ // Remove the block.
+ MF->erase(RBB);
+ ++NumDeadReturnBlocks;
+ }
+ }
+
+ BlockInfo.clear();
+ ReturnBlocks.clear();
+
+ LLVM_DEBUG(dbgs() << "***** Branch Early Return Ended*****\n");
+
+ return MadeChange;
+}
\ No newline at end of file
>From fd204e4f48d572a834bc8732de3fc26c68aefc8d Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Tue, 6 Aug 2024 17:25:40 +0530
Subject: [PATCH 2/3] [CodeGen] [LIT] Addition of LIT mir testcases for
pre-commit test.
---
.../branch-early-return-conditional.mir | 180 ++++++++++++++++++
.../branch-early-return-unconditional.mir | 160 ++++++++++++++++
2 files changed, 340 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
new file mode 100644
index 0000000000000..9923bd1aca289
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
@@ -0,0 +1,180 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+
+---
+name: branch_early_return_conditional
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr12' }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+body: |
+ ; CHECK-LABEL: name: branch_early_return_conditional
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.5(0x30000000)
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+ ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr12
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+ ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+ ; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.1, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.entry:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+ ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+ ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+ ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr82
+ ; CHECK-NEXT: $sgpr82 = S_MOV_B32 killed $sgpr83
+ ; CHECK-NEXT: $sgpr83 = S_MOV_B32 killed $sgpr84
+ ; CHECK-NEXT: $sgpr84 = S_MOV_B32 killed $sgpr85
+ ; CHECK-NEXT: $sgpr101 = S_MOV_B32 killed $vcc_lo
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+ ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
+ ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+ ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+ ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5
+ ; CHECK-NEXT: $sgpr5 = S_MOV_B32 killed $sgpr6
+ ; CHECK-NEXT: $sgpr6 = S_MOV_B32 killed $sgpr7
+ ; CHECK-NEXT: $sgpr7 = S_MOV_B32 killed $sgpr8
+ ; CHECK-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr9
+ ; CHECK-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr10
+ ; CHECK-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr11
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+ ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_WAITCNT 3952
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.4(0x30000000)
+ liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+
+ S_WAITCNT 0
+ $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+ $sgpr81 = S_MOV_B32 killed $sgpr12
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+ $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+ S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.1:
+ liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+ $sgpr81 = S_MOV_B32 killed $sgpr82
+ $sgpr82 = S_MOV_B32 killed $sgpr83
+ $sgpr83 = S_MOV_B32 killed $sgpr84
+ $sgpr84 = S_MOV_B32 killed $sgpr85
+ $sgpr101 = S_MOV_B32 killed $vcc_lo
+
+ bb.2:
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+ liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+ S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+
+ bb.3:
+ liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ $sgpr4 = S_MOV_B32 killed $sgpr5
+ $sgpr5 = S_MOV_B32 killed $sgpr6
+ $sgpr6 = S_MOV_B32 killed $sgpr7
+ $sgpr7 = S_MOV_B32 killed $sgpr8
+ $sgpr8 = S_MOV_B32 killed $sgpr9
+ $sgpr9 = S_MOV_B32 killed $sgpr10
+ $sgpr10 = S_MOV_B32 killed $sgpr11
+ S_SETPC_B64 $sgpr4_sgpr5
+
+ bb.4:
+ liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+
+ $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ S_WAITCNT 3952
+ S_SETPC_B64_return undef $sgpr30_sgpr31
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
new file mode 100644
index 0000000000000..a17b88cc6cdbf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
@@ -0,0 +1,160 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+
+---
+name: branch_early_return_conditional
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr12' }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+body: |
+ ; CHECK-LABEL: name: branch_early_return_conditional
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+ ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr12
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+ ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+ ; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
+ ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr82_sgpr83 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+ ; CHECK-NEXT: $sgpr82 = S_ADD_U32 $sgpr82, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+ ; CHECK-NEXT: $sgpr83 = S_ADDC_U32 $sgpr83, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr82_sgpr83
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5
+ ; CHECK-NEXT: $sgpr5 = S_MOV_B32 killed $sgpr6
+ ; CHECK-NEXT: $sgpr6 = S_MOV_B32 killed $sgpr7
+ ; CHECK-NEXT: $sgpr7 = S_MOV_B32 killed $sgpr8
+ ; CHECK-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr9
+ ; CHECK-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr10
+ ; CHECK-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr11
+ ; CHECK-NEXT: $sgpr11 = S_MOV_B32 killed $sgpr12
+ ; CHECK-NEXT: $sgpr12 = S_MOV_B32 killed $sgpr13
+ ; CHECK-NEXT: $sgpr13 = S_MOV_B32 killed $sgpr14
+ ; CHECK-NEXT: $sgpr14 = S_MOV_B32 killed $sgpr15
+ ; CHECK-NEXT: $sgpr15 = S_MOV_B32 killed $sgpr16
+ ; CHECK-NEXT: $sgpr16 = S_MOV_B32 killed $sgpr17
+ ; CHECK-NEXT: $sgpr17 = S_MOV_B32 killed $sgpr18
+ ; CHECK-NEXT: $sgpr18 = S_MOV_B32 killed $sgpr19
+ ; CHECK-NEXT: $sgpr19 = S_MOV_B32 killed $sgpr20
+ ; CHECK-NEXT: $sgpr20 = S_MOV_B32 killed $sgpr21
+ ; CHECK-NEXT: $sgpr21 = S_MOV_B32 killed $sgpr22
+ ; CHECK-NEXT: $sgpr22 = S_MOV_B32 killed $sgpr23
+ ; CHECK-NEXT: $sgpr23 = S_MOV_B32 killed $sgpr24
+ ; CHECK-NEXT: $sgpr24 = S_MOV_B32 killed $sgpr25
+ ; CHECK-NEXT: $sgpr25 = S_MOV_B32 killed $sgpr26
+ ; CHECK-NEXT: $sgpr26 = S_MOV_B32 killed $sgpr27
+ ; CHECK-NEXT: $sgpr27 = S_MOV_B32 killed $sgpr28
+ ; CHECK-NEXT: $sgpr28 = S_MOV_B32 killed $sgpr29
+ ; CHECK-NEXT: $sgpr29 = S_MOV_B32 killed $sgpr30
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_WAITCNT 3952
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+
+ S_WAITCNT 0
+ $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+ $sgpr81 = S_MOV_B32 killed $sgpr12
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+ $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+ S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+
+ bb.1:
+ successors: %bb.4(0x04000000)
+ liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ INLINEASM &"v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+ S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ S_BRANCH %bb.4
+
+ bb.2:
+ liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ $sgpr4 = S_MOV_B32 killed $sgpr5
+ $sgpr5 = S_MOV_B32 killed $sgpr6
+ $sgpr6 = S_MOV_B32 killed $sgpr7
+ $sgpr7 = S_MOV_B32 killed $sgpr8
+ $sgpr8 = S_MOV_B32 killed $sgpr9
+ $sgpr9 = S_MOV_B32 killed $sgpr10
+ $sgpr10 = S_MOV_B32 killed $sgpr11
+ $sgpr11 = S_MOV_B32 killed $sgpr12
+ $sgpr12 = S_MOV_B32 killed $sgpr13
+ $sgpr13 = S_MOV_B32 killed $sgpr14
+ $sgpr14 = S_MOV_B32 killed $sgpr15
+ $sgpr15 = S_MOV_B32 killed $sgpr16
+ $sgpr16 = S_MOV_B32 killed $sgpr17
+ $sgpr17 = S_MOV_B32 killed $sgpr18
+ $sgpr18 = S_MOV_B32 killed $sgpr19
+ $sgpr19 = S_MOV_B32 killed $sgpr20
+ $sgpr20 = S_MOV_B32 killed $sgpr21
+ $sgpr21 = S_MOV_B32 killed $sgpr22
+ $sgpr22 = S_MOV_B32 killed $sgpr23
+ $sgpr23 = S_MOV_B32 killed $sgpr24
+ $sgpr24 = S_MOV_B32 killed $sgpr25
+ $sgpr25 = S_MOV_B32 killed $sgpr26
+ $sgpr26 = S_MOV_B32 killed $sgpr27
+ $sgpr27 = S_MOV_B32 killed $sgpr28
+ $sgpr28 = S_MOV_B32 killed $sgpr29
+ $sgpr29 = S_MOV_B32 killed $sgpr30
+ S_SETPC_B64 $sgpr4_sgpr5
+
+ bb.4:
+ liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+
+ $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ S_WAITCNT 3952
+ S_SETPC_B64_return undef $sgpr30_sgpr31
+
+...
>From dc67a8c66905e3516a47e47bea60b91b1ec903f1 Mon Sep 17 00:00:00 2001
From: vg0204 <Vikash.Gupta at amd.com>
Date: Tue, 6 Aug 2024 17:48:38 +0530
Subject: [PATCH 3/3] Updated LIT testCase to test for early return pass.
---
.../branch-early-return-conditional.mir | 68 +++++++------------
.../branch-early-return-unconditional.mir | 37 ++++------
2 files changed, 37 insertions(+), 68 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
index 9923bd1aca289..2ecab6f30b8a2 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-conditional.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=early-return,branch-relaxation %s -o - | FileCheck %s
---
name: branch_early_return_conditional
@@ -13,7 +13,7 @@ machineFunctionInfo:
body: |
; CHECK-LABEL: name: branch_early_return_conditional
; CHECK: bb.0.entry:
- ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.5(0x30000000)
+ ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000)
; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAITCNT 0
@@ -28,23 +28,22 @@ body: |
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.1, implicit killed $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5.entry:
- ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: bb.1.entry:
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
- ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
- ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
- ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
- ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
- ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_WAITCNT 3952
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
@@ -53,10 +52,10 @@ body: |
; CHECK-NEXT: $sgpr83 = S_MOV_B32 killed $sgpr84
; CHECK-NEXT: $sgpr84 = S_MOV_B32 killed $sgpr85
; CHECK-NEXT: $sgpr101 = S_MOV_B32 killed $vcc_lo
- ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: S_BRANCH %bb.3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.8:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
@@ -64,16 +63,16 @@ body: |
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000)
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.5(0x7c000000)
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.4, implicit killed $scc
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.7:
- ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
@@ -85,7 +84,7 @@ body: |
; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: bb.4:
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5
@@ -96,27 +95,6 @@ body: |
; CHECK-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr10
; CHECK-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr11
; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.6:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
- ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
- ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
- ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
- ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; CHECK-NEXT: S_WAITCNT 3952
- ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
bb.0.entry:
successors: %bb.1(0x50000000), %bb.4(0x30000000)
liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
index a17b88cc6cdbf..1fe84c5e569a0 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-early-return-unconditional.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=branch-relaxation %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass=early-return,branch-relaxation %s -o - | FileCheck %s
---
name: branch_early_return_conditional
@@ -13,7 +13,7 @@ machineFunctionInfo:
body: |
; CHECK-LABEL: name: branch_early_return_conditional
; CHECK: bb.0.entry:
- ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000)
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.3(0x30000000)
; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAITCNT 0
@@ -28,25 +28,28 @@ body: |
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
- ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:VRegOrLds_32 */, def renamable $sgpr4
; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr82_sgpr83 = S_GETPC_B64 post-instr-symbol <mcsymbol >
- ; CHECK-NEXT: $sgpr82 = S_ADD_U32 $sgpr82, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
- ; CHECK-NEXT: $sgpr83 = S_ADDC_U32 $sgpr83, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
- ; CHECK-NEXT: S_SETPC_B64 $sgpr82_sgpr83
+ ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_WAITCNT 3952
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: bb.3:
; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5
@@ -76,18 +79,6 @@ body: |
; CHECK-NEXT: $sgpr28 = S_MOV_B32 killed $sgpr29
; CHECK-NEXT: $sgpr29 = S_MOV_B32 killed $sgpr30
; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
- ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
- ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
- ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
- ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; CHECK-NEXT: S_WAITCNT 3952
- ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
bb.0.entry:
successors: %bb.1(0x50000000), %bb.2(0x30000000)
liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
More information about the llvm-commits
mailing list