[llvm] 0576f43 - AMDGPU: Don't sometimes allow instructions before lowered si_end_cf

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 18 10:43:08 PDT 2020


Author: Matt Arsenault
Date: 2020-09-18T13:43:01-04:00
New Revision: 0576f436e577cede25810729aef236ec8c649446

URL: https://github.com/llvm/llvm-project/commit/0576f436e577cede25810729aef236ec8c649446
DIFF: https://github.com/llvm/llvm-project/commit/0576f436e577cede25810729aef236ec8c649446.diff

LOG: AMDGPU: Don't sometimes allow instructions before lowered si_end_cf

Since 6524a7a2b9ca072bd7f7b4355d1230e70c679d2f, this would sometimes
not emit the or to exec at the beginning of the block, where it really
has to be. If there is an instruction that defines one of the source
operands, split the block and turn the si_end_cf into a terminator.

This avoids regressions when regalloc fast is switched to inserting
reloads at the beginning of the block, instead of spills at the end of
the block.

In a future change, this should always split the block.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
    llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
    llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
    llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 21ad82d54661..7f06d545602f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1659,7 +1659,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     // register allocation.
     MI.setDesc(get(AMDGPU::S_XOR_B32));
     break;
-
+  case AMDGPU::S_OR_B64_term:
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_OR_B64));
+    break;
   case AMDGPU::S_OR_B32_term:
     // This is only a terminator to get the correct spill code placement during
     // register allocation.
@@ -2236,6 +2240,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
     case AMDGPU::SI_MASK_BRANCH:
     case AMDGPU::S_MOV_B64_term:
     case AMDGPU::S_XOR_B64_term:
+    case AMDGPU::S_OR_B64_term:
     case AMDGPU::S_ANDN2_B64_term:
     case AMDGPU::S_MOV_B32_term:
     case AMDGPU::S_XOR_B32_term:

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 47b27d63408d..51918e3cde94 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -264,6 +264,7 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
 let WaveSizePredicate = isWave64 in {
 def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
 def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
+def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
 def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
 }
 

diff  --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 914668f2b68a..2d31c3444937 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -99,6 +99,7 @@ class SILowerControlFlow : public MachineFunctionPass {
   unsigned MovTermOpc;
   unsigned Andn2TermOpc;
   unsigned XorTermrOpc;
+  unsigned OrTermrOpc;
   unsigned OrSaveExecOpc;
   unsigned Exec;
 
@@ -106,7 +107,10 @@ class SILowerControlFlow : public MachineFunctionPass {
   void emitElse(MachineInstr &MI);
   void emitIfBreak(MachineInstr &MI);
   void emitLoop(MachineInstr &MI);
-  void emitEndCf(MachineInstr &MI);
+
+  MachineBasicBlock *splitBlock(MachineInstr &MI, MachineBasicBlock *BB,
+                                LiveIntervals *LIS);
+  MachineBasicBlock *emitEndCf(MachineInstr &MI);
 
   void findMaskOperands(MachineInstr &MI, unsigned OpNo,
                         SmallVectorImpl<MachineOperand> &Src) const;
@@ -115,7 +119,7 @@ class SILowerControlFlow : public MachineFunctionPass {
 
   bool removeMBBifRedundant(MachineBasicBlock &MBB);
 
-  void process(MachineInstr &MI);
+  MachineBasicBlock *process(MachineInstr &MI);
 
   // Skip to the next instruction, ignoring debug instructions, and trivial
   // block boundaries (blocks that have one (typically fallthrough) successor,
@@ -489,19 +493,73 @@ SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
   } while (true);
 }
 
-void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
+MachineBasicBlock *SILowerControlFlow::splitBlock(MachineInstr &MI,
+                                                  MachineBasicBlock *BB,
+                                                  LiveIntervals *LIS) {
+  MachineBasicBlock::iterator SplitPoint(&MI);
+  ++SplitPoint;
+
+  if (SplitPoint == BB->end()) {
+    // Don't bother with a new block.
+    return BB;
+  }
+
+  // Make sure we add any physregs we define in the block as liveins to the new
+  // block.
+  LivePhysRegs LiveRegs(*TRI);
+  LiveRegs.addLiveOuts(*BB);
+  for (auto I = BB->rbegin(), E = SplitPoint.getReverse(); I != E; ++I)
+    LiveRegs.stepBackward(*I);
+
+  MachineFunction *MF = BB->getParent();
+  MachineBasicBlock *SplitBB
+    = MF->CreateMachineBasicBlock(BB->getBasicBlock());
+
+  MF->insert(++MachineFunction::iterator(BB), SplitBB);
+  SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
+
+  SplitBB->transferSuccessorsAndUpdatePHIs(BB);
+  BB->addSuccessor(SplitBB);
+
+  addLiveIns(*SplitBB, LiveRegs);
+
+  if (LIS)
+    LIS->insertMBBInMaps(SplitBB, &MI);
+
+  return SplitBB;
+}
+
+MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
-  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-  Register CFMask = MI.getOperand(0).getReg();
-  MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
   const DebugLoc &DL = MI.getDebugLoc();
 
-  MachineBasicBlock::iterator InsPt =
-      Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
-                               : MBB.begin();
-  MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
-                            .addReg(Exec)
-                            .add(MI.getOperand(0));
+  MachineBasicBlock::iterator InsPt = MBB.begin();
+
+  // If we have instructions that aren't prolog instructions, split the block
+  // and emit a terminator instruction. This ensures correct spill placement.
+  // FIXME: We should unconditionally split the block here.
+  bool NeedBlockSplit = false;
+  Register DataReg = MI.getOperand(0).getReg();
+  for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator();
+       I != E; ++I) {
+    if (I->modifiesRegister(DataReg, TRI)) {
+      NeedBlockSplit = true;
+      break;
+    }
+  }
+
+  unsigned Opcode = OrOpc;
+  MachineBasicBlock *SplitBB = &MBB;
+  if (NeedBlockSplit) {
+    SplitBB = splitBlock(MI, &MBB, LIS);
+    Opcode = OrTermrOpc;
+    InsPt = MI;
+  }
+
+  MachineInstr *NewMI =
+    BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
+    .addReg(Exec)
+    .add(MI.getOperand(0));
 
   LoweredEndCf.insert(NewMI);
 
@@ -522,6 +580,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
 
   if (LIS)
     LIS->handleMove(*NewMI);
+  return SplitBB;
 }
 
 // Returns replace operands for a logical operation, either single result
@@ -608,11 +667,13 @@ void SILowerControlFlow::optimizeEndCf() {
   }
 }
 
-void SILowerControlFlow::process(MachineInstr &MI) {
+MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   MachineBasicBlock::iterator I(MI);
   MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
 
+  MachineBasicBlock *SplitBB = &MBB;
+
   switch (MI.getOpcode()) {
   case AMDGPU::SI_IF:
     emitIf(MI);
@@ -631,7 +692,7 @@ void SILowerControlFlow::process(MachineInstr &MI) {
     break;
 
   case AMDGPU::SI_END_CF:
-    emitEndCf(MI);
+    SplitBB = emitEndCf(MI);
     break;
 
   default:
@@ -656,6 +717,8 @@ void SILowerControlFlow::process(MachineInstr &MI) {
       break;
     }
   }
+
+  return SplitBB;
 }
 
 bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
@@ -718,6 +781,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
     MovTermOpc = AMDGPU::S_MOV_B32_term;
     Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
     XorTermrOpc = AMDGPU::S_XOR_B32_term;
+    OrTermrOpc = AMDGPU::S_OR_B32_term;
     OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
     Exec = AMDGPU::EXEC_LO;
   } else {
@@ -727,6 +791,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
     MovTermOpc = AMDGPU::S_MOV_B64_term;
     Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
     XorTermrOpc = AMDGPU::S_XOR_B64_term;
+    OrTermrOpc = AMDGPU::S_OR_B64_term;
     OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
     Exec = AMDGPU::EXEC;
   }
@@ -734,19 +799,21 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
   SmallVector<MachineInstr *, 32> Worklist;
 
   MachineFunction::iterator NextBB;
-  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-       BI != BE; BI = NextBB) {
+  for (MachineFunction::iterator BI = MF.begin();
+       BI != MF.end(); BI = NextBB) {
     NextBB = std::next(BI);
-    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock *MBB = &*BI;
 
-    MachineBasicBlock::iterator I, Next;
-    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+    MachineBasicBlock::iterator I, E, Next;
+    E = MBB->end();
+    for (I = MBB->begin(); I != E; I = Next) {
       Next = std::next(I);
       MachineInstr &MI = *I;
+      MachineBasicBlock *SplitMBB = MBB;
 
       switch (MI.getOpcode()) {
       case AMDGPU::SI_IF:
-        process(MI);
+        SplitMBB = process(MI);
         break;
 
       case AMDGPU::SI_ELSE:
@@ -757,12 +824,17 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
         if (InsertKillCleanups)
           Worklist.push_back(&MI);
         else
-          process(MI);
+          SplitMBB = process(MI);
         break;
 
       default:
         break;
       }
+
+      if (SplitMBB != MBB) {
+        MBB = Next->getParent();
+        E = MBB->end();
+      }
     }
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 449b6287a87b..0dd6c09a958c 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -196,6 +196,12 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
     MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
     return true;
   }
+  case AMDGPU::S_OR_B64_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(TII.get(AMDGPU::S_OR_B64));
+    return true;
+  }
   case AMDGPU::S_OR_B32_term: {
     // This is only a terminator to get the correct spill code placement during
     // register allocation.

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
index faea2df6b517..fbcf53a36970 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-other-terminators.mir
@@ -205,9 +205,11 @@ body:             |
   ; CHECK:   dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
   ; CHECK: bb.2:
-  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   successors: %bb.3(0x80000000)
   ; CHECK:   [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
-  ; CHECK:   $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
+  ; CHECK:   $exec = S_OR_B64_term $exec, killed [[COPY5]], implicit-def $scc
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
   ; CHECK:   S_SLEEP 1
   ; CHECK:   [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
   ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc

diff  --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
index b360f3aa5ffb..cc86f5b267bb 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
@@ -1,18 +1,17 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=liveintervals,si-lower-control-flow,si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
 
 # Check that assert is not triggered
 
-...
 ---
 name: si-lower-control-flow
 body: |
   bb.0:
     ; GCN-LABEL: name: si-lower-control-flow
     ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-    ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0
+    ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0
     ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc
-    ; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
+    ; GCN: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
     ; GCN: S_ENDPGM 0
     %0:sgpr_64 = COPY $sgpr4_sgpr5
     %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0
@@ -51,3 +50,324 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+# We need to split the block for SI_END_CF, but
+---
+name: end_cf_split_block_end
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: end_cf_split_block_end
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
+  ; GCN:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; GCN:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
+  ; GCN:   $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
+    %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:sreg_64_xexec = COPY %5
+    SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name: end_cf_split_block_physreg_livein
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: end_cf_split_block_physreg_livein
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
+  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
+  ; GCN:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; GCN:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.3(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
+  ; GCN:   S_NOP 0
+  ; GCN:   $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
+  ; GCN: bb.3:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5
+  ; GCN:   S_SLEEP 3
+  ; GCN:   S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
+    %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+    liveins: $vgpr0, $sgpr4_sgpr5
+
+    %6:sreg_64_xexec = COPY %5
+    S_NOP 0
+    SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    S_SLEEP 3
+    S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5
+
+  bb.2:
+    S_ENDPGM 0
+
+...
+
+---
+name: end_cf_split_block_physreg_livein_liveout
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: end_cf_split_block_physreg_livein_liveout
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
+  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
+  ; GCN:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; GCN:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.3(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
+  ; GCN:   $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
+  ; GCN: bb.3:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9
+  ; GCN:   S_SLEEP 3
+  ; GCN:   S_NOP 0
+  ; GCN: bb.2:
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
+  ; GCN:   S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
+    %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
+
+    %6:sreg_64_xexec = COPY %5
+    SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    S_SLEEP 3
+    S_NOP 0
+
+  bb.2:
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
+    S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+
+...
+
+---
+name: end_cf_split_block_physreg_liveout
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: end_cf_split_block_physreg_liveout
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
+  ; GCN:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; GCN:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.3(0x80000000)
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
+  ; GCN:   $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
+  ; GCN: bb.3:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   liveins: $vgpr3
+  ; GCN:   $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   $sgpr4_sgpr5 = S_MOV_B64 32
+  ; GCN: bb.2:
+  ; GCN:   liveins: $vgpr3, $sgpr4_sgpr5
+  ; GCN:   S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
+    %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+
+    %6:sreg_64_xexec = COPY %5
+    SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 0, implicit $exec
+    $sgpr4_sgpr5 = S_MOV_B64 32
+
+  bb.2:
+    liveins: $vgpr3, $sgpr4_sgpr5
+    S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5
+
+...
+
+---
+name: end_cf_split_block_physreg_live_across_split
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: end_cf_split_block_physreg_live_across_split
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
+  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+  ; GCN:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
+  ; GCN:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; GCN:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.3(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
+  ; GCN:   $sgpr4_sgpr5 = S_MOV_B64 32
+  ; GCN:   $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
+  ; GCN: bb.3:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5
+  ; GCN:   S_SLEEP 3, implicit $sgpr4_sgpr5
+  ; GCN:   S_NOP 0
+  ; GCN: bb.2:
+  ; GCN:   liveins: $vgpr0, $sgpr4_sgpr5
+  ; GCN:   S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
+    %4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    successors: %bb.2
+    liveins: $vgpr0, $sgpr4_sgpr5
+
+    %6:sreg_64_xexec = COPY %5
+    $sgpr4_sgpr5 = S_MOV_B64 32
+    SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    S_SLEEP 3, implicit $sgpr4_sgpr5
+    S_NOP 0
+
+  bb.2:
+    liveins: $vgpr0, $sgpr4_sgpr5
+    S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
+
+...
+
+---
+name: end_cf_split_block_process_next_inst
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: end_cf_split_block_process_next_inst
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+  ; GCN:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+  ; GCN:   [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY2]], implicit $exec
+  ; GCN:   dead %5:sreg_64_xexec = S_MOV_B64 0
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.3(0x80000000)
+  ; GCN:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]]
+  ; GCN:   $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
+  ; GCN: bb.3:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; GCN:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
+  ; GCN:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc
+  ; GCN:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; GCN:   dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; GCN:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GCN: bb.2:
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %2:vgpr_32 = COPY killed $vgpr2
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 %0, killed %1, implicit $exec
+    %4:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %2, implicit $exec
+    %5:sreg_64_xexec = S_MOV_B64 0
+
+  bb.1:
+    successors: %bb.2
+
+    %6:sreg_64_xexec = COPY %3
+    SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %7:sreg_64_xexec = SI_IF %4, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %8:sreg_64_xexec = S_MOV_B64_term %7, implicit $exec
+
+  bb.2:
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list