[llvm] f0ccdde - [AMDGPU] Remove SI_MASK_BRANCH

Mon Mar 8 17:13:51 PST 2021

Author: Ruiling Song
Date: 2021-03-09T09:13:23+08:00
New Revision: f0ccdde3c9ab23f997a4a30a0e9ab7d9435ec1c6

URL: https://github.com/llvm/llvm-project/commit/f0ccdde3c9ab23f997a4a30a0e9ab7d9435ec1c6
DIFF: https://github.com/llvm/llvm-project/commit/f0ccdde3c9ab23f997a4a30a0e9ab7d9435ec1c6.diff

LOG: [AMDGPU] Remove SI_MASK_BRANCH

This is already deprecated, so remove code working on this.
Also update the tests by using S_CBRANCH_EXECZ instead of SI_MASK_BRANCH.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D97545

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
    llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
    llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
    llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
    llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
    llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
    llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
    llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
    llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
    llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
    llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
    llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
    llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
    llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
    llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
    llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
    llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
    llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index a8cba3f5cc5c..b2bdd4d6d169 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -274,24 +274,9 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
       ++I;
     }
   } else {
-    // We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are
+    // We don't want these pseudo instructions encoded. They are
     // placeholder terminator instructions and should only be printed as
     // comments.
-    if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
-      if (isVerbose()) {
-        SmallVector<char, 16> BBStr;
-        raw_svector_ostream Str(BBStr);
-
-        const MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
-        const MCSymbolRefExpr *Expr
-          = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
-        Expr->print(Str, MAI);
-        OutStreamer->emitRawComment(Twine(" mask branch ") + BBStr);
-      }
-
-      return;
-    }
-
     if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
       if (isVerbose())
         OutStreamer->emitRawComment(" return to shader part epilog");

diff  --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 1d291d9433f7..66ab2a6e5abe 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -7,9 +7,7 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// This pass inserts branches on the 0 exec mask over divergent branches
-/// branches when it's expected that jumping over the untaken control flow will
-/// be cheaper than having every workitem no-op through it.
+/// This pass mainly lowers early terminate pseudo instructions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,32 +22,21 @@ using namespace llvm;
 
 #define DEBUG_TYPE "si-insert-skips"
 
-static cl::opt<unsigned> SkipThresholdFlag(
-  "amdgpu-skip-threshold-legacy",
-  cl::desc("Number of instructions before jumping over divergent control flow"),
-  cl::init(12), cl::Hidden);
-
 namespace {
 
 class SIInsertSkips : public MachineFunctionPass {
 private:
   const SIRegisterInfo *TRI = nullptr;
   const SIInstrInfo *TII = nullptr;
-  unsigned SkipThreshold = 0;
   MachineDominatorTree *MDT = nullptr;
 
   MachineBasicBlock *EarlyExitBlock = nullptr;
   bool EarlyExitClearsExec = false;
 
-  bool shouldSkip(const MachineBasicBlock &From,
-                  const MachineBasicBlock &To) const;
-
   void ensureEarlyExitBlock(MachineBasicBlock &MBB, bool ClearExec);
 
   void earlyTerm(MachineInstr &MI);
 
-  bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
-
 public:
   static char ID;
 
@@ -87,53 +74,6 @@ static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
   if (MI.isMetaInstruction())
     return true;
 
-  // Handle target specific opcodes.
-  switch (MI.getOpcode()) {
-  case AMDGPU::SI_MASK_BRANCH:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
-                               const MachineBasicBlock &To) const {
-  unsigned NumInstr = 0;
-  const MachineFunction *MF = From.getParent();
-
-  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
-       MBBI != End && MBBI != ToI; ++MBBI) {
-    const MachineBasicBlock &MBB = *MBBI;
-
-    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
-         NumInstr < SkipThreshold && I != E; ++I) {
-      if (opcodeEmitsNoInsts(*I))
-        continue;
-
-      // FIXME: Since this is required for correctness, this should be inserted
-      // during SILowerControlFlow.
-
-      // When a uniform loop is inside non-uniform control flow, the branch
-      // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
-      // when EXEC = 0. We should skip the loop lest it becomes infinite.
-      if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
-          I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
-        return true;
-
-      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
-        return true;
-
-      // These instructions are potentially expensive even if EXEC = 0.
-      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
-          I->getOpcode() == AMDGPU::S_WAITCNT)
-        return true;
-
-      ++NumInstr;
-      if (NumInstr >= SkipThreshold)
-        return true;
-    }
-  }
-
   return false;
 }
 
@@ -209,29 +149,11 @@ void SIInsertSkips::earlyTerm(MachineInstr &MI) {
   MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
 }
 
-// Returns true if a branch over the block was inserted.
-bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
-                                   MachineBasicBlock &SrcMBB) {
-  MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
-
-  if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
-    return false;
-
-  const DebugLoc &DL = MI.getDebugLoc();
-  MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
-
-  BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
-    .addMBB(DestBB);
-
-  return true;
-}
-
 bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   TII = ST.getInstrInfo();
   TRI = &TII->getRegisterInfo();
   MDT = &getAnalysis<MachineDominatorTree>();
-  SkipThreshold = SkipThresholdFlag;
 
   MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
   ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
@@ -246,10 +168,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr &MI = *I;
 
       switch (MI.getOpcode()) {
-      case AMDGPU::SI_MASK_BRANCH:
-        MadeChange |= skipMaskBranch(MI, MBB);
-        break;
-
       case AMDGPU::S_BRANCH:
         // Optimize out branches to the next block.
         // FIXME: Shouldn't this be handled by BranchFolding?

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5a7ef04ec4f2..f7991d5dcbf0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2369,10 +2369,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
 
   // Skip over the instructions that are artificially terminators for special
   // exec management.
-  while (I != E && !I->isBranch() && !I->isReturn() &&
-         I->getOpcode() != AMDGPU::SI_MASK_BRANCH) {
+  while (I != E && !I->isBranch() && !I->isReturn()) {
     switch (I->getOpcode()) {
-    case AMDGPU::SI_MASK_BRANCH:
     case AMDGPU::S_MOV_B64_term:
     case AMDGPU::S_XOR_B64_term:
     case AMDGPU::S_OR_B64_term:
@@ -2400,34 +2398,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   if (I == E)
     return false;
 
-  if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
-    return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
-
-  ++I;
-
-  // TODO: Should be able to treat as fallthrough?
-  if (I == MBB.end())
-    return true;
-
-  if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
-    return true;
-
-  MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
-
-  // Specifically handle the case where the conditional branch is to the same
-  // destination as the mask branch. e.g.
-  //
-  // si_mask_branch BB8
-  // s_cbranch_execz BB8
-  // s_cbranch BB9
-  //
-  // This is required to understand divergent loops which may need the branches
-  // to be relaxed.
-  if (TBB != MaskBrDest || Cond.empty())
-    return true;
-
-  auto Pred = Cond[0].getImm();
-  return (Pred != EXECZ && Pred != EXECNZ);
+  return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
 }
 
 unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
@@ -2438,11 +2409,6 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
   unsigned RemovedSize = 0;
   while (I != MBB.end()) {
     MachineBasicBlock::iterator Next = std::next(I);
-    if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
-      I = Next;
-      continue;
-    }
-
     RemovedSize += getInstSizeInBytes(*I);
     I->eraseFromParent();
     ++Count;

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 20d591c48b96..796d85fb8af6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -274,19 +274,6 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
 // SI pseudo instructions. These are used by the CFG structurizer pass
 // and should be lowered to ISA instructions prior to codegen.
 
-// Dummy terminator instruction to use after control flow instructions
-// replaced with exec mask operations.
-def SI_MASK_BRANCH : VPseudoInstSI <
-  (outs), (ins brtarget:$target)> {
-  let isBranch = 0;
-  let isTerminator = 1;
-  let isBarrier = 0;
-  let SchedRW = [];
-  let hasNoSchedulingInfo = 1;
-  let FixedSize = 1;
-  let Size = 0;
-}
-
 let isTerminator = 1 in {
 
 let OtherPredicates = [EnableLateCFGStructurize] in {

diff  --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
index 62100ae33eaa..6741d4d46a8f 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
@@ -39,7 +39,7 @@ body: |
     %20:sreg_64 = COPY $exec, implicit-def $exec
     %21:sreg_64 = S_AND_B64 %20, %19, implicit-def dead $scc
     $exec = S_MOV_B64_term %21
-    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
     S_BRANCH %bb.3
 
   bb.3:
@@ -73,7 +73,7 @@ body: |
     %29:sreg_64 = COPY $exec, implicit-def $exec
     %30:sreg_64 = S_AND_B64 %29, %26, implicit-def dead $scc
     $exec = S_MOV_B64_term %30
-    SI_MASK_BRANCH %bb.10, implicit $exec
+    S_CBRANCH_EXECZ %bb.10, implicit $exec
     S_BRANCH %bb.8
 
   bb.8:

diff  --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
index 95e38af688c4..de14c0304ba0 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
@@ -90,7 +90,7 @@ body:             |
     %44:sreg_64 = S_AND_B64 %43, %30, implicit-def dead $scc
     %45:sreg_64 = S_XOR_B64 %44, %43, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %44
-    SI_MASK_BRANCH %bb.9, implicit $exec
+    S_CBRANCH_EXECZ %bb.9, implicit $exec
     S_BRANCH %bb.8
 
   bb.5:
@@ -122,7 +122,7 @@ body:             |
     %67:sreg_64 = COPY $exec, implicit-def $exec
     %68:sreg_64 = S_AND_B64 %67, %61, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %68
-    SI_MASK_BRANCH %bb.13, implicit $exec
+    S_CBRANCH_EXECZ %bb.13, implicit $exec
     S_BRANCH %bb.7
 
   bb.7:
@@ -198,7 +198,7 @@ body:             |
     %90:sreg_64 = S_AND_B64 %89, %87, implicit-def dead $scc
     %46:sreg_64 = S_XOR_B64 %90, %89, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %90
-    SI_MASK_BRANCH %bb.5, implicit $exec
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
     S_BRANCH %bb.15
 
   bb.13:
@@ -211,7 +211,7 @@ body:             |
     %95:sreg_64 = COPY $exec, implicit-def $exec
     %96:sreg_64 = S_AND_B64 %95, %93, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %96
-    SI_MASK_BRANCH %bb.16, implicit $exec
+    S_CBRANCH_EXECZ %bb.16, implicit $exec
     S_BRANCH %bb.14
 
   bb.14:

diff  --git a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
index e50b0c835735..4bd73ea682f9 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subregjoin-fullcopy.mir
@@ -83,7 +83,7 @@ body: |
     %23:sreg_64 = S_AND_B64 %22, %18, implicit-def dead $scc
     %24:sreg_64 = S_XOR_B64 %23, %22, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %23
-    SI_MASK_BRANCH %bb.7, implicit $exec
+    S_CBRANCH_EXECZ %bb.7, implicit $exec
     S_BRANCH %bb.18
 
   bb.7:

diff  --git a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
index 0eeb9d4dcc93..01a60108598c 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-with-subregs-bad-identical.mir
@@ -74,7 +74,7 @@ body: |
     %23:sreg_64 = COPY $exec, implicit-def $exec
     %24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %24
-    SI_MASK_BRANCH %bb.7, implicit $exec
+    S_CBRANCH_EXECZ %bb.7, implicit $exec
     S_BRANCH %bb.5
 
   bb.5:
@@ -153,7 +153,7 @@ body: |
     %50:sreg_64 = COPY $exec, implicit-def $exec
     %51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
     $exec = S_MOV_B64_term killed %51
-    SI_MASK_BRANCH %bb.16, implicit $exec
+    S_CBRANCH_EXECZ %bb.16, implicit $exec
     S_BRANCH %bb.15
 
   bb.15:

diff  --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
index 3569085bdd07..f522c3f30e4b 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
@@ -30,7 +30,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_LT_U32_e64_]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_]]
-  ; GCN:   SI_MASK_BRANCH %bb.4, implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.4, implicit $exec
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -47,7 +47,7 @@ body:             |
   ; GCN:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_1]]
-  ; GCN:   SI_MASK_BRANCH %bb.3, implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.3, implicit $exec
   ; GCN:   S_BRANCH %bb.2
   ; GCN: bb.2:
   ; GCN:   successors: %bb.3(0x80000000)
@@ -77,7 +77,7 @@ body:             |
     %3:sreg_64 = COPY $exec, implicit-def $exec
     %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
     $exec = S_MOV_B64_term %4
-    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:
@@ -96,7 +96,7 @@ body:             |
     %12:sreg_64 = COPY $exec, implicit-def $exec
     %13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
     $exec = S_MOV_B64_term %13
-    SI_MASK_BRANCH %bb.3, implicit $exec
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
     S_BRANCH %bb.2
 
   bb.2:

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
index c0968b2437ba..77e094ea081e 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs  %s -o - | FileCheck %s
 
 ---
 
@@ -8,7 +8,6 @@ body: |
   ; CHECK-LABEL: name: skip_execz_flat
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
@@ -18,7 +17,7 @@ body: |
   ; CHECK:   S_ENDPGM 0
   bb.0:
     successors: %bb.1, %bb.2
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ   %bb.2, implicit $exec
 
   bb.1:
     successors: %bb.2
@@ -36,7 +35,6 @@ body: |
   ; CHECK-LABEL: name: skip_execz_mubuf
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
@@ -46,7 +44,7 @@ body: |
   ; CHECK:   S_ENDPGM 0
   bb.0:
     successors: %bb.1, %bb.2
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ  %bb.2, implicit $exec
 
   bb.1:
     successors: %bb.2
@@ -64,7 +62,6 @@ body: |
   ; CHECK-LABEL: name: skip_execz_ds
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
@@ -74,7 +71,7 @@ body: |
   ; CHECK:   S_ENDPGM 0
   bb.0:
     successors: %bb.1, %bb.2
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
 
   bb.1:
     successors: %bb.2

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
index c35a506ba811..5424ad39b4d9 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs  %s -o - | FileCheck %s
 # Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
 
 ---
@@ -9,7 +9,6 @@ body: |
   ; CHECK-LABEL: name: skip_gws_init
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
@@ -19,7 +18,7 @@ body: |
   ; CHECK:   S_ENDPGM 0
   bb.0:
     successors: %bb.1, %bb.2
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
 
   bb.1:
     successors: %bb.2
@@ -37,7 +36,6 @@ body: |
   ; CHECK-LABEL: name: skip_gws_barrier
   ; CHECK: bb.0:
   ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.2(0x80000000)
@@ -47,7 +45,7 @@ body: |
   ; CHECK:   S_ENDPGM 0
   bb.0:
     successors: %bb.1, %bb.2
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
 
   bb.1:
     successors: %bb.2

diff  --git a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
index a0c0a6f20522..928324492d51 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
@@ -1,44 +1,18 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s
-
----
-
-# CHECK-LABEL: name: no_count_mask_branch_pseudo
-# CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
-# CHECK-NOT: S_CBRANCH_EXECZ
-name: no_count_mask_branch_pseudo
-body: |
-  bb.0:
-    successors: %bb.1
-
-    $vgpr1 = V_MOV_B32_e32 7, implicit $exec
-    SI_MASK_BRANCH %bb.2, implicit $exec
-
-  bb.1:
-    successors: %bb.2
-    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-    SI_MASK_BRANCH %bb.3, implicit $exec
-
-  bb.2:
-    $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-
-  bb.3:
-    S_ENDPGM 0
-...
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
 
 ---
 
+# FIXME: RemoveShortExecBranches should not count dbg_value instructions.
 # CHECK-LABEL: name: no_count_dbg_value
 # CHECK: $vgpr1 = V_MOV_B32_e32 7, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
 # CHECK-NOT: S_CBRANCH_EXECZ
 name: no_count_dbg_value
 body: |
   bb.0:
-    successors: %bb.1
+    successors: %bb.1, %bb.2
 
     $vgpr1 = V_MOV_B32_e32 7, implicit $exec
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
 
   bb.1:
     successors: %bb.2

diff  --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index 4b01d1552624..faf4e7524778 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -9,25 +9,16 @@ name:            loop_header_nopred
 body:             |
   ; GCN-LABEL: name: loop_header_nopred
   ; GCN: bb.0:
-  ; GCN:   successors: %bb.1(0x80000000)
-  ; GCN:   S_INST_PREFETCH 1
-  ; GCN:   S_BRANCH %bb.1
-  ; GCN: bb.6 (align 64):
-  ; GCN:   successors: %bb.7(0x04000000), %bb.1(0x7c000000)
-  ; GCN:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc
-  ; GCN: bb.1:
-  ; GCN:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
-  ; GCN:   S_CBRANCH_VCCNZ %bb.2, implicit $vcc
-  ; GCN: bb.3:
-  ; GCN:   successors: %bb.4(0x40000000), %bb.6(0x40000000)
-  ; GCN:   SI_MASK_BRANCH %bb.6, implicit $exec
-  ; GCN:   S_BRANCH %bb.4
-  ; GCN: bb.2 (align 64):
-  ; GCN:   successors: %bb.4(0x40000000), %bb.6(0x40000000)
-  ; GCN:   SI_MASK_BRANCH %bb.6, implicit $exec
-  ; GCN:   S_BRANCH %bb.4
-  ; GCN: bb.4:
-  ; GCN:   successors: %bb.5(0x04000000), %bb.4(0x7c000000)
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1 (align 64):
+  ; GCN:   successors: %bb.7(0x04000000), %bb.2(0x7c000000)
+  ; GCN:   S_CBRANCH_VCCNZ %bb.7, implicit $vcc_lo
+  ; GCN: bb.2:
+  ; GCN:   successors: %bb.5(0x40000000), %bb.1(0x40000000)
+  ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GCN: bb.5:
+  ; GCN:   successors: %bb.1(0x04000000), %bb.5(0x7c000000)
   ; GCN:   S_NOP 0
   ; GCN:   S_NOP 0
   ; GCN:   S_NOP 0
@@ -39,10 +30,8 @@ body:             |
   ; GCN:   S_NOP 0
   ; GCN:   S_NOP 0
   ; GCN:   S_NOP 0
-  ; GCN:   S_CBRANCH_EXECZ %bb.4, implicit $exec
-  ; GCN: bb.5:
-  ; GCN:   successors: %bb.6(0x80000000)
-  ; GCN:   S_BRANCH %bb.6
+  ; GCN:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.7:
   ; GCN:   S_ENDPGM 0
   bb.0:
@@ -60,7 +49,7 @@ body:             |
   bb.3:
     successors: %bb.4(0x40000000), %bb.6(0x40000000)
 
-    SI_MASK_BRANCH %bb.6, implicit $exec
+    S_CBRANCH_EXECZ %bb.6, implicit $exec
     S_BRANCH %bb.4
 
   bb.4:

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
index 3a19ec60aacb..188d76dd3142 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -86,7 +86,7 @@ body:             |
     V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
     $sgpr2_sgpr3 = S_XOR_B64 $exec, killed $sgpr2_sgpr3, implicit-def dead $scc
-    SI_MASK_BRANCH %bb.2.exit, implicit $exec
+    S_CBRANCH_EXECZ %bb.2.exit, implicit $exec
 
   bb.1.atomic:
     successors: %bb.2.exit(0x80000000)

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
index e5a7421dbd5d..ed1caf7bc698 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
@@ -22,7 +22,7 @@ body:             |
   ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
   ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY1]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_]]
-  ; GCN:   SI_MASK_BRANCH %bb.2, implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; GCN:   S_BRANCH %bb.1
   ; GCN: bb.1:
   ; GCN:   successors: %bb.2(0x80000000)
@@ -32,7 +32,7 @@ body:             |
   ; GCN:   $exec = S_AND_B64 $exec, [[COPY]], implicit-def dead $scc
   ; GCN:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[S_OR_SAVEEXEC_B64_]], implicit-def $scc
   ; GCN:   $exec = S_XOR_B64_term $exec, [[S_AND_B64_1]], implicit-def $scc
-  ; GCN:   SI_MASK_BRANCH %bb.6, implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.6, implicit $exec
   ; GCN:   S_BRANCH %bb.3
   ; GCN: bb.3:
   ; GCN:   successors: %bb.4(0x40000000), %bb.5(0x40000000)
@@ -40,7 +40,7 @@ body:             |
   ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; GCN:   [[S_AND_B64_2:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc
   ; GCN:   $exec = S_MOV_B64_term [[S_AND_B64_2]]
-  ; GCN:   SI_MASK_BRANCH %bb.5, implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.5, implicit $exec
   ; GCN:   S_BRANCH %bb.4
   ; GCN: bb.4:
   ; GCN:   successors: %bb.5(0x80000000)
@@ -60,7 +60,7 @@ body:             |
     %4:sreg_64 = S_AND_B64 %3, %2, implicit-def dead $scc
     %5:sreg_64 = S_XOR_B64 %4, %3, implicit-def dead $scc
     $exec = S_MOV_B64_term %4
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:
@@ -72,7 +72,7 @@ body:             |
     $exec = S_AND_B64 $exec, %0, implicit-def dead $scc
     %7:sreg_64 = S_AND_B64 $exec, %6, implicit-def $scc
     $exec = S_XOR_B64_term $exec, %7, implicit-def $scc
-    SI_MASK_BRANCH %bb.6, implicit $exec
+    S_CBRANCH_EXECZ %bb.6, implicit $exec
     S_BRANCH %bb.3
 
   bb.3:
@@ -82,7 +82,7 @@ body:             |
     %9:sreg_64 = COPY $exec, implicit-def $exec
     %10:sreg_64 = S_AND_B64 %9, %8, implicit-def dead $scc
     $exec = S_MOV_B64_term %10
-    SI_MASK_BRANCH %bb.5, implicit $exec
+    S_CBRANCH_EXECZ %bb.5, implicit $exec
     S_BRANCH %bb.4
 
   bb.4:

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
index 9cbbabc361ab..85e308521562 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir
@@ -128,7 +128,7 @@
 # CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}}
 # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
 
 name:            optimize_if_and_saveexec_xor
 liveins:
@@ -143,7 +143,7 @@ body:             |
     $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -166,7 +166,7 @@ body:             |
 ---
 # CHECK-LABEL: name: optimize_if_and_saveexec{{$}}
 # CHECK: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
 
 name:            optimize_if_and_saveexec
 liveins:
@@ -180,7 +180,7 @@ body:             |
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
     $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -203,7 +203,7 @@ body:             |
 ---
 # CHECK-LABEL: name: optimize_if_or_saveexec{{$}}
 # CHECK: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
 
 name:            optimize_if_or_saveexec
 liveins:
@@ -217,7 +217,7 @@ body:             |
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
     $sgpr2_sgpr3 = S_OR_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -243,7 +243,7 @@ body:             |
 # CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
 name:            optimize_if_and_saveexec_xor_valu_middle
 liveins:
   - { reg: '$vgpr0' }
@@ -258,7 +258,7 @@ body:             |
     BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -283,7 +283,7 @@ body:             |
 # CHECK: $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
 # CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
 name:            optimize_if_and_saveexec_xor_wrong_reg
 liveins:
   - { reg: '$vgpr0' }
@@ -299,7 +299,7 @@ body:             |
     $sgpr0_sgpr1 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $sgpr0_sgpr1 = S_XOR_B64 undef $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term $sgpr0_sgpr1
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -322,7 +322,7 @@ body:             |
 # CHECK-NEXT: $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
 
 name:            optimize_if_and_saveexec_xor_modify_copy_to_exec
 liveins:
@@ -338,7 +338,7 @@ body:             |
     $sgpr2_sgpr3 = S_OR_B64 killed $sgpr2_sgpr3, 1, implicit-def $scc
     $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -365,7 +365,7 @@ body:             |
 # CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
 # CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
 # CHECK-NEXT: $exec = COPY $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
 name:            optimize_if_and_saveexec_xor_live_out_setexec
 liveins:
   - { reg: '$vgpr0' }
@@ -379,7 +379,7 @@ body:             |
     $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -404,7 +404,7 @@ body:             |
 # CHECK: $sgpr0_sgpr1 = COPY $exec
 # CHECK: $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
 
 name:            optimize_if_unknown_saveexec
 liveins:
@@ -418,7 +418,7 @@ body:             |
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
     $sgpr2_sgpr3 = S_LSHR_B64 $sgpr0_sgpr1, killed $vcc_lo, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -441,7 +441,7 @@ body:             |
 ---
 # CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}}
 # CHECK: $sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-# CHECK-NEXT: SI_MASK_BRANCH
+# CHECK-NEXT: S_CBRANCH_EXECZ
 
 name:            optimize_if_andn2_saveexec
 liveins:
@@ -455,7 +455,7 @@ body:             |
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
     $sgpr2_sgpr3 = S_ANDN2_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -479,7 +479,7 @@ body:             |
 # CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}}
 # CHECK: $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
 # CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
-# CHECK-NEXT: SI_MASK_BRANCH %bb.2, implicit $exec
+# CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
 name:            optimize_if_andn2_saveexec_no_commute
 liveins:
   - { reg: '$vgpr0' }
@@ -492,7 +492,7 @@ body:             |
     $vgpr0 = V_MOV_B32_e32 4, implicit $exec
     $sgpr2_sgpr3 = S_ANDN2_B64 killed $vcc, $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:
@@ -531,7 +531,7 @@ body:             |
     $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
     $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1.if:

diff  --git a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
index 70bc5033e8a1..4ec62f8a306a 100644
--- a/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
+++ b/llvm/test/CodeGen/AMDGPU/readlane_exec0.mir
@@ -15,7 +15,7 @@ body:       |
     $vgpr4 = V_AND_B32_e32 1, $vgpr1, implicit $exec
     V_CMP_EQ_U32_e32 1, killed $vgpr4, implicit-def $vcc, implicit $exec
     $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:

diff  --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
index ffcad230d0d0..37a04c866948 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=1000000 -o -  %s | FileCheck %s
 
 ---
 name: skip_branch_taildup_endpgm
@@ -21,14 +21,12 @@ body:             |
   ; CHECK:   V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
   ; CHECK:   $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; CHECK:   renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
-  ; CHECK:   SI_MASK_BRANCH %bb.1, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; CHECK:   S_BRANCH %bb.3
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
   ; CHECK:   renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
   ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK:   S_BRANCH %bb.4
   ; CHECK: bb.2:
@@ -42,7 +40,6 @@ body:             |
   ; CHECK:   $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
   ; CHECK:   renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
   ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.4:
   ; CHECK:   renamable $vgpr2 = V_MOV_B32_e32 8, implicit $exec
@@ -67,7 +64,7 @@ body:             |
     V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
     renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.2:
@@ -76,7 +73,7 @@ body:             |
 
     renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
     $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
-    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
     S_BRANCH %bb.3
 
   bb.4:
@@ -95,7 +92,7 @@ body:             |
     $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1, implicit $exec
     renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
     $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
-    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
     S_BRANCH %bb.3
 
   bb.3:
@@ -120,14 +117,12 @@ body:             |
   ; CHECK:   V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
   ; CHECK:   $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; CHECK:   renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
-  ; CHECK:   SI_MASK_BRANCH %bb.1, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.1, implicit $exec
   ; CHECK:   S_BRANCH %bb.3
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.4(0x40000000), %bb.2(0x40000000)
   ; CHECK:   renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
   ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK:   S_BRANCH %bb.4
   ; CHECK: bb.2:
@@ -139,7 +134,6 @@ body:             |
   ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
   ; CHECK:   renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
   ; CHECK:   $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
-  ; CHECK:   SI_MASK_BRANCH %bb.2, implicit $exec
   ; CHECK:   S_CBRANCH_EXECZ %bb.2, implicit $exec
   ; CHECK: bb.4:
   ; CHECK:   renamable $vgpr0 = V_MOV_B32_e32 8, implicit $exec
@@ -154,7 +148,7 @@ body:             |
     V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
     renamable $sgpr6_sgpr7 = S_XOR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.2:
@@ -163,7 +157,7 @@ body:             |
 
     renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
     $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
-    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
     S_BRANCH %bb.3
 
   bb.4:
@@ -180,7 +174,7 @@ body:             |
     renamable $vgpr0 = V_MOV_B32_e32 15, implicit $exec
     renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 killed renamable $sgpr6_sgpr7, implicit-def $exec, implicit-def $scc, implicit $exec
     $exec = S_XOR_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc
-    SI_MASK_BRANCH %bb.4, implicit $exec
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
     S_BRANCH %bb.3
 
   bb.3:

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
index a63813caff0e..76e7d73cdf6c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir
@@ -49,7 +49,7 @@ body:             |
 
     %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec
     $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
-    SI_MASK_BRANCH %bb.3, implicit $exec
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
     S_BRANCH %bb.2
 
   bb.2:
@@ -57,7 +57,7 @@ body:             |
 
     %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec
     $exec = S_XOR_B64_term $exec, %0, implicit-def $scc
-    SI_MASK_BRANCH %bb.3, implicit $exec
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
     S_BRANCH %bb.4
 
   bb.3:

diff  --git a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
index 53c6139d645b..58aeddd7b3b3 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir
@@ -203,7 +203,7 @@ body: |
     %47:sreg_64 = COPY $exec, implicit-def $exec
     %48:sreg_64 = S_AND_B64 %47, %46, implicit-def dead $scc
     $exec = S_MOV_B64_term %48
-    SI_MASK_BRANCH %bb.18, implicit $exec
+    S_CBRANCH_EXECZ %bb.18, implicit $exec
     S_BRANCH %bb.16
 
   bb.16:

diff  --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
index 02638a1d7c5f..2bed3e0162c7 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir
@@ -52,7 +52,7 @@ body:             |
     SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
     $sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1:
@@ -117,7 +117,7 @@ body:             |
     $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
     SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
     $exec = S_MOV_B64_term killed $sgpr2_sgpr3
-    SI_MASK_BRANCH %bb.2, implicit $exec
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
     S_BRANCH %bb.1
 
   bb.1: