[llvm] 5073a62 - [MachineBasicBlock] Explicit FT branching param
Anshil Gandhi via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 17 16:12:26 PST 2023
Author: Anshil Gandhi
Date: 2023-01-17T17:12:08-07:00
New Revision: 5073a622a785e8fd542fd15484970a435ef2e3e5
URL: https://github.com/llvm/llvm-project/commit/5073a622a785e8fd542fd15484970a435ef2e3e5
DIFF: https://github.com/llvm/llvm-project/commit/5073a622a785e8fd542fd15484970a435ef2e3e5.diff
LOG: [MachineBasicBlock] Explicit FT branching param
Introduce a parameter in getFallThrough() to optionally
allow returning the fall through basic block in spite of
an explicit branch instruction to it. This parameter is
set to false by default.
Introduce getLogicalFallThrough() which calls
getFallThrough(false) to obtain the block while avoiding
insertion of a jump instruction to its immediate successor.
This patch also reverts the changes made by D134557 and
solves the case where a jump is inserted after another jump
(branch-relax-no-terminators.mir).
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D140790
Added:
llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
Modified:
llvm/include/llvm/CodeGen/MachineBasicBlock.h
llvm/lib/CodeGen/BranchRelaxation.cpp
llvm/lib/CodeGen/MachineBasicBlock.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index f2fc266662b74..1ab24b554f5b5 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -793,10 +793,15 @@ class MachineBasicBlock
/// Return the fallthrough block if the block can implicitly
/// transfer control to the block after it by falling off the end of
- /// it. This should return null if it can reach the block after
- /// it, but it uses an explicit branch to do so (e.g., a table
- /// jump). Non-null return is a conservative answer.
- MachineBasicBlock *getFallThrough();
+ /// it. If an explicit branch to the fallthrough block is not allowed,
+ /// set JumpToFallThrough to be false. Non-null return is a conservative
+ /// answer.
+ MachineBasicBlock *getFallThrough(bool JumpToFallThrough = false);
+
+ /// Return the fallthrough block if the block can implicitly
+ /// transfer control to it's successor, whether by a branch or
+ /// a fallthrough. Non-null return is a conservative answer.
+ MachineBasicBlock *getLogicalFallThrough() { return getFallThrough(true); }
/// Return true if the block can implicitly transfer control to the
/// block after it by falling off the end of it. This should return
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index b2431886a011f..016c81dc5aa4b 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -442,7 +442,6 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
MachineBasicBlock *MBB = MI.getParent();
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
unsigned OldBrSize = TII->getInstSizeInBytes(MI);
MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
@@ -456,20 +455,6 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
MachineBasicBlock *BranchBB = MBB;
- auto RemoveBranch = [&](MachineBasicBlock *MBB) {
- unsigned &BBSize = BlockInfo[MBB->getNumber()].Size;
- int RemovedSize = 0;
- TII->removeBranch(*MBB, &RemovedSize);
- BBSize -= RemovedSize;
- };
-
- auto InsertUncondBranch = [&](MachineBasicBlock *MBB,
- MachineBasicBlock *Dst) {
- TII->insertUnconditionalBranch(*MBB, Dst, DebugLoc());
- // Recalculate the block size.
- BlockInfo[MBB->getNumber()].Size = computeBlockSize(*MBB);
- };
-
// If this was an expanded conditional branch, there is already a single
// unconditional branch in a block.
if (!MBB->empty()) {
@@ -511,13 +496,10 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
// Fall through only if PrevBB has no unconditional branch as one of its
// terminators.
- if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
- report_fatal_error("Could not analyze terminators.");
- if (!FBB) {
- if (!Cond.empty() && TBB && TBB == DestBB)
- RemoveBranch(PrevBB);
- if (!TBB || (TBB && !Cond.empty()))
- InsertUncondBranch(PrevBB, DestBB);
+ if (auto *FT = PrevBB->getLogicalFallThrough()) {
+ assert(FT == DestBB);
+ TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
+ BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
}
// Now, RestoreBB could be placed directly before DestBB.
MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 499ff2401aca2..5ef377f2a1c0e 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -944,7 +944,7 @@ const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const {
return Successors.size() == 1 ? Successors[0] : nullptr;
}
-MachineBasicBlock *MachineBasicBlock::getFallThrough() {
+MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
@@ -975,8 +975,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough() {
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
- if (MachineFunction::iterator(TBB) == Fallthrough ||
- MachineFunction::iterator(FBB) == Fallthrough)
+ if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough))
return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
new file mode 100644
index 0000000000000..6f35fd0fedb8a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
@@ -0,0 +1,180 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass branch-relaxation %s -o - | FileCheck %s
+
+---
+name: branch_no_terminators
+alignment: 1
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr12' }
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+body: |
+ ; CHECK-LABEL: name: branch_no_terminators
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.5(0x30000000)
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+ ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr12
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+ ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+ ; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.1, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.entry:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+ ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+ ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+ ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr82
+ ; CHECK-NEXT: $sgpr82 = S_MOV_B32 killed $sgpr83
+ ; CHECK-NEXT: $sgpr83 = S_MOV_B32 killed $sgpr84
+ ; CHECK-NEXT: $sgpr84 = S_MOV_B32 killed $sgpr85
+ ; CHECK-NEXT: $sgpr101 = S_MOV_B32 killed $vcc_lo
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+ ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32_XEXEC */, def renamable $sgpr4
+ ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
+ ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc
+ ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(<unknown target flag>) <mcsymbol >, implicit-def $scc, implicit $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5
+ ; CHECK-NEXT: $sgpr5 = S_MOV_B32 killed $sgpr6
+ ; CHECK-NEXT: $sgpr6 = S_MOV_B32 killed $sgpr7
+ ; CHECK-NEXT: $sgpr7 = S_MOV_B32 killed $sgpr8
+ ; CHECK-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr9
+ ; CHECK-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr10
+ ; CHECK-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr11
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
+ ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
+ ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_WAITCNT 3952
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
+ bb.0.entry:
+ successors: %bb.1(0x50000000), %bb.4(0x30000000)
+ liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32
+
+ S_WAITCNT 0
+ $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0
+ $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0
+ $sgpr81 = S_MOV_B32 killed $sgpr12
+ $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0
+ $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1
+ S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+
+ bb.1:
+ liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1
+ $sgpr81 = S_MOV_B32 killed $sgpr82
+ $sgpr82 = S_MOV_B32 killed $sgpr83
+ $sgpr83 = S_MOV_B32 killed $sgpr84
+ $sgpr84 = S_MOV_B32 killed $sgpr85
+ $sgpr101 = S_MOV_B32 killed $vcc_lo
+
+ bb.2:
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+ liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4
+ S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+
+ bb.3:
+ liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
+
+ $sgpr4 = S_MOV_B32 killed $sgpr5
+ $sgpr5 = S_MOV_B32 killed $sgpr6
+ $sgpr6 = S_MOV_B32 killed $sgpr7
+ $sgpr7 = S_MOV_B32 killed $sgpr8
+ $sgpr8 = S_MOV_B32 killed $sgpr9
+ $sgpr9 = S_MOV_B32 killed $sgpr10
+ $sgpr10 = S_MOV_B32 killed $sgpr11
+ S_SETPC_B64 $sgpr4_sgpr5
+
+ bb.4:
+ liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
+
+ $sgpr101 = V_READLANE_B32 $vgpr1, 6
+ $sgpr100 = V_READLANE_B32 $vgpr1, 5
+ $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec
+ $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec
+ $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ S_WAITCNT 3952
+ S_SETPC_B64_return undef $sgpr30_sgpr31
+
+...
More information about the llvm-commits
mailing list