[llvm] f0ccdde - [AMDGPU] Remove SI_MASK_BRANCH
Ruiling Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 8 17:13:51 PST 2021
Author: Ruiling Song
Date: 2021-03-09T09:13:23+08:00
New Revision: f0ccdde3c9ab23f997a4a30a0e9ab7d9435ec1c6
URL: https://github.com/llvm/llvm-project/commit/f0ccdde3c9ab23f997a4a30a0e9ab7d9435ec1c6
DIFF: https://github.com/llvm/llvm-project/commit/f0ccdde3c9ab23f997a4a30a0e9ab7d9435ec1c6.diff
LOG: [AMDGPU] Remove SI_MASK_BRANCH
This is already deprecated, so remove code working on this.
Also update the tests by using S_CBRANCH_EXECZ instead of SI_MASK_BRANCH.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D97545
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index a8cba3f5cc5c..b2bdd4d6d169 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -274,24 +274,9 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
++I;
}
} else {
- // We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are
+ // We don't want these pseudo instructions encoded. They are
// placeholder terminator instructions and should only be printed as
// comments.
- if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
- if (isVerbose()) {
- SmallVector<char, 16> BBStr;
- raw_svector_ostream Str(BBStr);
-
- const MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
- const MCSymbolRefExpr *Expr
- = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
- Expr->print(Str, MAI);
- OutStreamer->emitRawComment(Twine(" mask branch ") + BBStr);
- }
-
- return;
- }
-
if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
if (isVerbose())
OutStreamer->emitRawComment(" return to shader part epilog");
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 1d291d9433f7..66ab2a6e5abe 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This pass inserts branches on the 0 exec mask over divergent branches
-/// branches when it's expected that jumping over the untaken control flow will
-/// be cheaper than having every workitem no-op through it.
+/// This pass mainly lowers early terminate pseudo instructions.
//
//===----------------------------------------------------------------------===//
@@ -24,32 +22,21 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
-static cl::opt<unsigned> SkipThresholdFlag(
- "amdgpu-skip-threshold-legacy",
- cl::desc("Number of instructions before jumping over divergent control flow"),
- cl::init(12), cl::Hidden);
-
namespace {
class SIInsertSkips : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
- unsigned SkipThreshold = 0;
MachineDominatorTree *MDT = nullptr;
MachineBasicBlock *EarlyExitBlock = nullptr;
bool EarlyExitClearsExec = false;
- bool shouldSkip(const MachineBasicBlock &From,
- const MachineBasicBlock &To) const;
-
void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec);
void earlyTerm(MachineInstr &MI);
- bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
-
public:
static char ID;
@@ -87,53 +74,6 @@ static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
if (MI.isMetaInstruction())
return true;
- // Handle target specific opcodes.
- switch (MI.getOpcode()) {
- case AMDGPU::SI_MASK_BRANCH:
- return true;
- default:
- return false;
- }
-}
-
-bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
- const MachineBasicBlock &To) const {
- unsigned NumInstr = 0;
- const MachineFunction *MF = From.getParent();
-
- for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
- MBBI != End && MBBI != ToI; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- NumInstr < SkipThreshold && I != E; ++I) {
- if (opcodeEmitsNoInsts(*I))
- continue;
-
- // FIXME: Since this is required for correctness, this should be inserted
- // during SILowerControlFlow.
-
- // When a uniform loop is inside non-uniform control flow, the branch
- // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
- // when EXEC = 0. We should skip the loop lest it becomes infinite.
- if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
- I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
- return true;
-
- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
- return true;
-
- // These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
- I->getOpcode() == AMDGPU::S_WAITCNT)
- return true;
-
- ++NumInstr;
- if (NumInstr >= SkipThreshold)
- return true;
- }
- }
-
return false;
}
@@ -209,29 +149,11 @@ void SIInsertSkips::earlyTerm(MachineInstr &MI) {
MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
}
-// Returns true if a branch over the block was inserted.
-bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
- MachineBasicBlock &SrcMBB) {
- MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
-
- if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
- return false;
-
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
-
- BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addMBB(DestBB);
-
- return true;
-}
-
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MDT = &getAnalysis<MachineDominatorTree>();
- SkipThreshold = SkipThresholdFlag;
MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@@ -246,10 +168,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
- case AMDGPU::SI_MASK_BRANCH:
- MadeChange |= skipMaskBranch(MI, MBB);
- break;
-
case AMDGPU::S_BRANCH:
// Optimize out branches to the next block.
// FIXME: Shouldn't this be handled by BranchFolding?
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5a7ef04ec4f2..f7991d5dcbf0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2369,10 +2369,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Skip over the instructions that are artificially terminators for special
// exec management.
- while (I != E && !I->isBranch() && !I->isReturn() &&
- I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
+ while (I != E && !I->isBranch() && !I->isReturn()) {
switch (I->getOpcode()) {
- case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
case AMDGPU::S_OR_B64_term:
@@ -2400,34 +2398,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
if (I == E)
return false;
- if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
- return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
-
- ++I;
-
- // TODO: Should be able to treat as fallthrough?
- if (I == MBB.end())
- return true;
-
- if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
- return true;
-
- MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
-
- // Specifically handle the case where the conditional branch is to the same
- // destination as the mask branch. e.g.
- //
- // si_mask_branch BB8
- // s_cbranch_execz BB8
- // s_cbranch BB9
- //
- // This is required to understand divergent loops which may need the branches
- // to be relaxed.
- if (TBB != MaskBrDest || Cond.empty())
- return true;
-
- auto Pred = Cond[0].getImm();
- return (Pred != EXECZ && Pred != EXECNZ);
+ return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
}
unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
@@ -2438,11 +2409,6 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
unsigned RemovedSize = 0;
while (I != MBB.end()) {
MachineBasicBlock::iterator Next = std::next(I);
- if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
- I = Next;
- continue;
- }
-
RemovedSize += getInstSizeInBytes(*I);
I->eraseFromParent();
++Count;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 20d591c48b96..796d85fb8af6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -274,19 +274,6 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
-// Dummy terminator instruction to use after control flow instructions
-// replaced with exec mask operations.
-def SI_MASK_BRANCH : VPseudoInstSI <
- (outs), (ins brtarget:$target)> {
- let isBranch = 0;
- let isTerminator = 1;
- let isBarrier = 0;
- let SchedRW = [];
- let hasNoSchedulingInfo = 1;
- let FixedSize = 1;
- let Size = 0;
-}
-
let isTerminator = 1 in {
let OtherPredicates = [EnableLateCFGStructurize] in {
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
index 62100ae33eaa..6741d4d46a8f 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
@@ -39,7 +39,7 @@ body: |
%20:sreg_64 = COPY $exec, implicit-def $exec
%21:sreg_64 = S_AND_B64 %20, %19, implicit-def dead $scc
$exec = S_MOV_B64_term %21
- SI_MASK_BRANCH %bb.4, implicit $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
@@ -73,7 +73,7 @@ body: |
%29:sreg_64 = COPY $exec, implicit-def $exec
%30:sreg_64 = S_AND_B64 %29, %26, implicit-def dead $scc
$exec = S_MOV_B64_term %30
- SI_MASK_BRANCH %bb.10, implicit $exec
+ S_CBRANCH_EXECZ %bb.10, implicit $exec
S_BRANCH %bb.8
bb.8:
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
index 95e38af688c4..de14c0304ba0 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
@@ -90,7 +90,7 @@ body: |
%44:sreg_64 = S_AND_B64 %43, %30, implicit-def dead $scc
%45:sreg_64 = S_XOR_B64 %44, %43, implicit-def dead $scc
$exec = S_MOV_B64_term killed %44
- SI_MASK_BRANCH %bb.9, implicit $exec
+ S_CBRANCH_EXECZ %bb.9, implicit $exec
S_BRANCH %bb.8
bb.5:
@@ -122,7 +122,7 @@ body: |
%67:sreg_64 = COPY $exec, implicit-def $exec
%68:sreg_64 = S_AND_B64 %67, %61, implicit-def dead $scc
$exec = S_MOV_B64_term killed %68
- SI_MASK_BRANCH %bb.13, implicit $exec
+ S_CBRANCH_EXECZ %bb.13, implicit $exec
S_BRANCH %bb.7
bb.7:
@@ -198,7 +198,7 @@ body: |
%90:sreg_64 = S_AND_B64 %89, %87, implicit-def dead $scc
%46:sreg_64 = S_XOR_B64 %90, %89, implicit-def dead $scc
$exec = S_MOV_B64_term killed %90
- SI_MASK_BRANCH %bb.5, implicit $exec
+ S_CBRANCH_EXECZ %bb.5, implicit $exec
S_BRANCH %bb.15
bb.13:
@@ -211,7 +211,7 @@ body: |
%95:sreg_64 = COPY $exec, implicit-def $exec
%96:sreg_64 = S_AND_B64 %95, %93, implicit-def dead $scc
$exec = S_MOV_B64_term killed %96
- SI_MASK_BRANCH %bb.16, implicit $exec
+ S_CBRANCH_EXECZ %bb.16, implicit $exec
S_BRANCH %bb.14
bb.14:
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
index e50b0c835735..4bd73ea682f9 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
@@ -83,7 +83,7 @@ body: |
%23:sreg_64 = S_AND_B64 %22, %18, implicit-def dead $scc
%24:sreg_64 = S_XOR_B64 %23, %22, implicit-def dead $scc
$exec = S_MOV_B64_term killed %23
- SI_MASK_BRANCH %bb.7, implicit $exec
+ S_CBRANCH_EXECZ %bb.7, implicit $exec
S_BRANCH %bb.18
bb.7:
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
index 0eeb9d4dcc93..01a60108598c 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
@@ -74,7 +74,7 @@ body: |
%23:sreg_64 = COPY $exec, implicit-def $exec
%24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc
$exec = S_MOV_B64_term killed %24
- SI_MASK_BRANCH %bb.7, implicit $exec
+ S_CBRANCH_EXECZ %bb.7, implicit $exec
S_BRANCH %bb.5
bb.5:
@@ -153,7 +153,7 @@ body: |
%50:sreg_64 = COPY $exec, implicit-def $exec
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
$exec = S_MOV_B64_term killed %51
- SI_MASK_BRANCH %bb.16, implicit $exec
+ S_CBRANCH_EXECZ %bb.16, implicit $exec
S_BRANCH %bb.15
bb.15:
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
index 3569085bdd07..f522c3f30e4b 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
@@ -30,7 +30,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
- ; GCN: SI_MASK_BRANCH %bb.4, implicit $exec
+ ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -47,7 +47,7 @@ body: |
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
- ; GCN: SI_MASK_BRANCH %bb.3, implicit $exec
+ ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.2:
; GCN: successors: %bb.3(0x80000000)
@@ -77,7 +77,7 @@ body: |
%3:sreg_64 = COPY $exec, implicit-def $exec
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
$exec = S_MOV_B64_term %4
- SI_MASK_BRANCH %bb.4, implicit $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -96,7 +96,7 @@ body: |
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
$exec = S_MOV_B64_term %13
- SI_MASK_BRANCH %bb.3, implicit $exec
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.2
bb.2:
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
index c0968b2437ba..77e094ea081e 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
---
@@ -8,7 +8,6 @@ body: |
; CHECK-LABEL: name: skip_execz_flat
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@@ -18,7 +17,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
@@ -36,7 +35,6 @@ body: |
; CHECK-LABEL: name: skip_execz_mubuf
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@@ -46,7 +44,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
@@ -64,7 +62,6 @@ body: |
; CHECK-LABEL: name: skip_execz_ds
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@@ -74,7 +71,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
index c35a506ba811..5424ad39b4d9 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
---
@@ -9,7 +9,6 @@ body: |
; CHECK-LABEL: name: skip_gws_init
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@@ -19,7 +18,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
@@ -37,7 +36,6 @@ body: |
; CHECK-LABEL: name: skip_gws_barrier
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
@@ -47,7 +45,7 @@ body: |
; CHECK: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
index a0c0a6f20522..928324492d51 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
@@ -1,44 +1,18 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s
-
----
-
-# CHECK-LABEL: name: no_count_mask_branch_pseudo
-# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
-# CHECK-NOT: S_CBRANCH_EXECZ
-name: no_count_mask_branch_pseudo
-body: |
- bb.0:
- successors: %bb.1
-
- $vgpr1 = V_MOV_B32_e32 7, implicit $exec
- SI_MASK_BRANCH %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- SI_MASK_BRANCH %bb.3, implicit $exec
-
- bb.2:
- $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-
- bb.3:
- S_ENDPGM 0
-...
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
---
+# FIXME: RemoveShortExecBranches should not count dbg_value instructions.
# CHECK-LABEL: name: no_count_dbg_value
# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
# CHECK-NOT: S_CBRANCH_EXECZ
name: no_count_dbg_value
body: |
bb.0:
- successors: %bb.1
+ successors: %bb.1, %bb.2
$vgpr1 = V_MOV_B32_e32 7, implicit $exec
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
bb.1:
successors: %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index 4b01d1552624..faf4e7524778 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -9,25 +9,16 @@ name: loop_header_nopred
body: |
; GCN-LABEL: name: loop_header_nopred
; GCN: bb.0:
- ; GCN: successors: %bb.1(0x80000000)
- ; GCN: S_INST_PREFETCH 1
- ; GCN: S_BRANCH %bb.1
- ; GCN: bb.6 (align 64):
- ; GCN: successors: %bb.7(0x04000000), %bb.1(0x7c000000)
- ; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc
- ; GCN: bb.1:
- ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
- ; GCN: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
- ; GCN: bb.3:
- ; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000)
- ; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
- ; GCN: S_BRANCH %bb.4
- ; GCN: bb.2 (align 64):
- ; GCN: successors: %bb.4(0x40000000), %bb.6(0x40000000)
- ; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
- ; GCN: S_BRANCH %bb.4
- ; GCN: bb.4:
- ; GCN: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: S_BRANCH %bb.2
+ ; GCN: bb.1 (align 64):
+ ; GCN: successors: %bb.7(0x04000000), %bb.2(0x7c000000)
+ ; GCN: S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
+ ; GCN: bb.2:
+ ; GCN: successors: %bb.5(0x40000000), %bb.1(0x40000000)
+ ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; GCN: bb.5:
+ ; GCN: successors: %bb.1(0x04000000), %bb.5(0x7c000000)
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
@@ -39,10 +30,8 @@ body: |
; GCN: S_NOP 0
; GCN: S_NOP 0
; GCN: S_NOP 0
- ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
- ; GCN: bb.5:
- ; GCN: successors: %bb.6(0x80000000)
- ; GCN: S_BRANCH %bb.6
+ ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; GCN: S_BRANCH %bb.1
; GCN: bb.7:
; GCN: S_ENDPGM 0
bb.0:
@@ -60,7 +49,7 @@ body: |
bb.3:
successors: %bb.4(0x40000000), %bb.6(0x40000000)
- SI_MASK_BRANCH %bb.6, implicit $exec
+ S_CBRANCH_EXECZ %bb.6, implicit $exec
S_BRANCH %bb.4
bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
index 3a19ec60aacb..188d76dd3142 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -86,7 +86,7 @@ body: |
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
$sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc
- SI_MASK_BRANCH %bb.2.exit, implicit $exec
+ S_CBRANCH_EXECZ %bb.2.exit, implicit $exec
bb.1.atomic:
successors: %bb.2.exit(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
index e5a7421dbd5d..ed1caf7bc698 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
@@ -22,7 +22,7 @@ body: |
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY1]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_]]
- ; GCN: SI_MASK_BRANCH %bb.2, implicit $exec
+ ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
@@ -32,7 +32,7 @@ body: |
; GCN: $exec = S_AND_B64 $exec, [[COPY]], implicit-def dead $scc
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
; GCN: $exec = S_XOR_B64_term $exec, [[S_AND_B64_1]], implicit-def $scc
- ; GCN: SI_MASK_BRANCH %bb.6, implicit $exec
+ ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec
; GCN: S_BRANCH %bb.3
; GCN: bb.3:
; GCN: successors: %bb.4(0x40000000), %bb.5(0x40000000)
@@ -40,7 +40,7 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_2]]
- ; GCN: SI_MASK_BRANCH %bb.5, implicit $exec
+ ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec
; GCN: S_BRANCH %bb.4
; GCN: bb.4:
; GCN: successors: %bb.5(0x80000000)
@@ -60,7 +60,7 @@ body: |
%4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
%5:sreg_64 = S_XOR_B64 %4, %3, implicit-def dead $scc
$exec = S_MOV_B64_term %4
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -72,7 +72,7 @@ body: |
$exec = S_AND_B64 $exec, %0, implicit-def dead $scc
%7:sreg_64 = S_AND_B64 $exec, %6, implicit-def $scc
$exec = S_XOR_B64_term $exec, %7, implicit-def $scc
- SI_MASK_BRANCH %bb.6, implicit $exec
+ S_CBRANCH_EXECZ %bb.6, implicit $exec
S_BRANCH %bb.3
bb.3:
@@ -82,7 +82,7 @@ body: |
%9:sreg_64 = COPY $exec, implicit-def $exec
%10:sreg_64 = S_AND_B64 %9, %8, implicit-def dead $scc
$exec = S_MOV_B64_term %10
- SI_MASK_BRANCH %bb.5, implicit $exec
+ S_CBRANCH_EXECZ %bb.5, implicit $exec
S_BRANCH %bb.4
bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
index 9cbbabc361ab..85e308521562 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
@@ -128,7 +128,7 @@
# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec_xor
liveins:
@@ -143,7 +143,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -166,7 +166,7 @@ body: |
---
# CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
# CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec
liveins:
@@ -180,7 +180,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -203,7 +203,7 @@ body: |
---
# CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
# CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_or_saveexec
liveins:
@@ -217,7 +217,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -243,7 +243,7 @@ body: |
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec_xor_valu_middle
liveins:
- { reg: '$vgpr0' }
@@ -258,7 +258,7 @@ body: |
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -283,7 +283,7 @@ body: |
# CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_and_saveexec_xor_wrong_reg
liveins:
- { reg: '$vgpr0' }
@@ -299,7 +299,7 @@ body: |
$sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term $sgpr0_sgpr1
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -322,7 +322,7 @@ body: |
# CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_and_saveexec_xor_modify_copy_to_exec
liveins:
@@ -338,7 +338,7 @@ body: |
$sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -365,7 +365,7 @@ body: |
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_and_saveexec_xor_live_out_setexec
liveins:
- { reg: '$vgpr0' }
@@ -379,7 +379,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -404,7 +404,7 @@ body: |
# CHECK: $sgpr0_sgpr1 = COPY $exec
# CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_unknown_saveexec
liveins:
@@ -418,7 +418,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -441,7 +441,7 @@ body: |
---
# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
# CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
name: optimize_if_andn2_saveexec
liveins:
@@ -455,7 +455,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -479,7 +479,7 @@ body: |
# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
# CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
name: optimize_if_andn2_saveexec_no_commute
liveins:
- { reg: '$vgpr0' }
@@ -492,7 +492,7 @@ body: |
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
$sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
@@ -531,7 +531,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1.if:
diff --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
index 70bc5033e8a1..4ec62f8a306a 100644
--- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
+++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
@@ -15,7 +15,7 @@ body: |
$vgpr4 = V_AND_B32_e32 1, $vgpr1, implicit $exec
V_CMP_EQ_U32_e32 1, killed $vgpr4, implicit-def $vcc, implicit $exec
$sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
index ffcad230d0d0..37a04c866948 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
---
name: skip_branch_taildup_endpgm
@@ -21,14 +21,12 @@ body: |
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
- ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: S_BRANCH %bb.3
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: S_BRANCH %bb.4
; CHECK: bb.2:
@@ -42,7 +40,6 @@ body: |
; CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
; CHECK: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.4:
; CHECK: renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
@@ -67,7 +64,7 @@ body: |
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.2:
@@ -76,7 +73,7 @@ body: |
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
- SI_MASK_BRANCH %bb.4, implicit $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
@@ -95,7 +92,7 @@ body: |
$vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
- SI_MASK_BRANCH %bb.4, implicit $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
@@ -120,14 +117,12 @@ body: |
; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
; CHECK: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
- ; CHECK: SI_MASK_BRANCH %bb.1, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.1, implicit $exec
; CHECK: S_BRANCH %bb.3
; CHECK: bb.1:
; CHECK: successors: %bb.4(0x40000000), %bb.2(0x40000000)
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: S_BRANCH %bb.4
; CHECK: bb.2:
@@ -139,7 +134,6 @@ body: |
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
; CHECK: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
; CHECK: bb.4:
; CHECK: renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
@@ -154,7 +148,7 @@ body: |
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
$sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.2:
@@ -163,7 +157,7 @@ body: |
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
- SI_MASK_BRANCH %bb.4, implicit $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.4:
@@ -180,7 +174,7 @@ body: |
renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
- SI_MASK_BRANCH %bb.4, implicit $exec
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
S_BRANCH %bb.3
bb.3:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
index a63813caff0e..76e7d73cdf6c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
@@ -49,7 +49,7 @@ body: |
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
- SI_MASK_BRANCH %bb.3, implicit $exec
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.2
bb.2:
@@ -57,7 +57,7 @@ body: |
%0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
$exec = S_XOR_B64_term $exec, %0, implicit-def $scc
- SI_MASK_BRANCH %bb.3, implicit $exec
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
S_BRANCH %bb.4
bb.3:
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
index 53c6139d645b..58aeddd7b3b3 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
@@ -203,7 +203,7 @@ body: |
%47:sreg_64 = COPY $exec, implicit-def $exec
%48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
$exec = S_MOV_B64_term %48
- SI_MASK_BRANCH %bb.18, implicit $exec
+ S_CBRANCH_EXECZ %bb.18, implicit $exec
S_BRANCH %bb.16
bb.16:
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index 02638a1d7c5f..2bed3e0162c7 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -52,7 +52,7 @@ body: |
SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
@@ -117,7 +117,7 @@ body: |
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
- SI_MASK_BRANCH %bb.2, implicit $exec
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
S_BRANCH %bb.1
bb.1:
More information about the llvm-commits
mailing list