[llvm] 1d8a94c - [AMDGPU] SILowerControlFlow: fix preservation of LiveIntervals

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 10 21:47:07 PDT 2023


Author: Carl Ritson
Date: 2023-09-11T13:46:28+09:00
New Revision: 1d8a94c4ffe758c4e9e81e63309d4a52a4db4356

URL: https://github.com/llvm/llvm-project/commit/1d8a94c4ffe758c4e9e81e63309d4a52a4db4356
DIFF: https://github.com/llvm/llvm-project/commit/1d8a94c4ffe758c4e9e81e63309d4a52a4db4356.diff

LOG: [AMDGPU] SILowerControlFlow: fix preservation of LiveIntervals

In emitElse live interval for SI_ELSE source must be recalculated
as SI_ELSE is removed, and new user is placed at block start.
In emitIfBreak live interval for new created AndReg must be
computed.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D158141

Added: 
    llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir

Modified: 
    llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
    llvm/test/CodeGen/AMDGPU/collapse-endcf.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 00cb5b2878f419c..a173adb6c58b59c 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -79,6 +79,7 @@ class SILowerControlFlow : public MachineFunctionPass {
   SetVector<MachineInstr*> LoweredEndCf;
   DenseSet<Register> LoweredIf;
   SmallSet<MachineBasicBlock *, 4> KillBlocks;
+  SmallSet<Register, 8> RecomputeRegs;
 
   const TargetRegisterClass *BoolRC = nullptr;
   unsigned AndOpc;
@@ -297,8 +298,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
   // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
   // hard to add another def here but I'm not sure how to correctly update the
   // valno.
-  LIS->removeInterval(SaveExecReg);
-  LIS->createAndComputeVirtRegInterval(SaveExecReg);
+  RecomputeRegs.insert(SaveExecReg);
   LIS->createAndComputeVirtRegInterval(Tmp);
   if (!SimpleIf)
     LIS->createAndComputeVirtRegInterval(CopyReg);
@@ -309,6 +309,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   const DebugLoc &DL = MI.getDebugLoc();
 
   Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
 
   MachineBasicBlock::iterator Start = MBB.begin();
 
@@ -319,7 +320,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
     BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
     .add(MI.getOperand(1)); // Saved EXEC
   if (LV)
-    LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
+    LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
 
   MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
 
@@ -331,9 +332,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
                           .addReg(Exec)
                           .addReg(SaveReg);
 
-  if (LIS)
-    LIS->InsertMachineInstrInMaps(*And);
-
   MachineInstr *Xor =
     BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
     .addReg(Exec)
@@ -356,12 +354,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   MI.eraseFromParent();
 
   LIS->InsertMachineInstrInMaps(*OrSaveExec);
+  LIS->InsertMachineInstrInMaps(*And);
 
   LIS->InsertMachineInstrInMaps(*Xor);
   LIS->InsertMachineInstrInMaps(*Branch);
 
-  LIS->removeInterval(DstReg);
-  LIS->createAndComputeVirtRegInterval(DstReg);
+  RecomputeRegs.insert(SrcReg);
+  RecomputeRegs.insert(DstReg);
   LIS->createAndComputeVirtRegInterval(SaveReg);
 
   // Let this be recomputed.
@@ -388,8 +387,9 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
   // AND the break condition operand with exec, then OR that into the "loop
   // exit" mask.
   MachineInstr *And = nullptr, *Or = nullptr;
+  Register AndReg;
   if (!SkipAnding) {
-    Register AndReg = MRI->createVirtualRegister(BoolRC);
+    AndReg = MRI->createVirtualRegister(BoolRC);
     And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
              .addReg(Exec)
              .add(MI.getOperand(1));
@@ -398,8 +398,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
     Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
              .addReg(AndReg)
              .add(MI.getOperand(2));
-    if (LIS)
-      LIS->createAndComputeVirtRegInterval(AndReg);
   } else {
     Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
              .add(MI.getOperand(1))
@@ -411,9 +409,13 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
     LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
 
   if (LIS) {
-    if (And)
-      LIS->InsertMachineInstrInMaps(*And);
     LIS->ReplaceMachineInstrInMaps(MI, *Or);
+    if (And) {
+      // Read of original operand 1 is on And now not Or.
+      RecomputeRegs.insert(And->getOperand(2).getReg());
+      LIS->InsertMachineInstrInMaps(*And);
+      LIS->createAndComputeVirtRegInterval(AndReg);
+    }
   }
 
   MI.eraseFromParent();
@@ -436,6 +438,7 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
           .add(MI.getOperand(1));
 
   if (LIS) {
+    RecomputeRegs.insert(MI.getOperand(0).getReg());
     LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
     LIS->InsertMachineInstrInMaps(*Branch);
   }
@@ -714,11 +717,13 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
 
   if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
     // This should be before all vector instructions.
-    BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
+    MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
             TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
         .addImm(MI.getOperand(0).getImm());
-    if (LIS)
+    if (LIS) {
       LIS->RemoveMachineInstrFromMaps(MI);
+      LIS->InsertMachineInstrInMaps(*InitMI);
+    }
     MI.eraseFromParent();
     return;
   }
@@ -789,8 +794,7 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
   LIS->InsertMachineInstrInMaps(*CmpMI);
   LIS->InsertMachineInstrInMaps(*CmovMI);
 
-  LIS->removeInterval(InputReg);
-  LIS->createAndComputeVirtRegInterval(InputReg);
+  RecomputeRegs.insert(InputReg);
   LIS->createAndComputeVirtRegInterval(CountReg);
 }
 
@@ -807,7 +811,7 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
 
   while (!MBB.predecessors().empty()) {
     MachineBasicBlock *P = *MBB.pred_begin();
-    if (P->getFallThrough() == &MBB)
+    if (P->getFallThrough(false) == &MBB)
       FallThrough = P;
     P->ReplaceUsesOfBlockWith(&MBB, Succ);
   }
@@ -828,14 +832,13 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
   MBB.clear();
   MBB.eraseFromParent();
   if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
-    if (!Succ->canFallThrough()) {
-      MachineFunction *MF = FallThrough->getParent();
-      MachineFunction::iterator FallThroughPos(FallThrough);
-      MF->splice(std::next(FallThroughPos), Succ);
-    } else
-      BuildMI(*FallThrough, FallThrough->end(),
-              FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
-          .addMBB(Succ);
+    // Note: we cannot update block layout and preserve live intervals;
+    // hence we must insert a branch.
+    MachineInstr *BranchMI = BuildMI(*FallThrough, FallThrough->end(),
+            FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+        .addMBB(Succ);
+    if (LIS)
+      LIS->InsertMachineInstrInMaps(*BranchMI);
   }
 
   return true;
@@ -947,6 +950,14 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
 
   optimizeEndCf();
 
+  if (LIS) {
+    for (Register Reg : RecomputeRegs) {
+      LIS->removeInterval(Reg);
+      LIS->createAndComputeVirtRegInterval(Reg);
+    }
+  }
+
+  RecomputeRegs.clear();
   LoweredEndCf.clear();
   LoweredIf.clear();
   KillBlocks.clear();

diff  --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
index a8b97c7932580d5..353697013a91951 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
@@ -446,15 +446,16 @@ body:             |
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.5(0x80000000)
   ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_BRANCH %bb.5
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.4:
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
+  ; GCN-NEXT:   S_ENDPGM 0
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.5:
   ; GCN-NEXT:   successors: %bb.4(0x80000000)
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   S_BRANCH %bb.4
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.4:
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
-  ; GCN-NEXT:   S_ENDPGM 0
   bb.0:
     successors: %bb.1, %bb.4
 
@@ -923,7 +924,6 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-  ; predecessors: %bb.0
     successors: %bb.2, %bb.6
 
     %3:vgpr_32 = IMPLICIT_DEF
@@ -932,7 +932,6 @@ body: |
     S_BRANCH %bb.2
 
   bb.2:
-  ; predecessors: %bb.1
     successors: %bb.3, %bb.7
 
     %6:vgpr_32 = IMPLICIT_DEF
@@ -941,7 +940,6 @@ body: |
     S_BRANCH %bb.3
 
   bb.3:
-  ; predecessors: %bb.2
     successors: %bb.4, %bb.5
 
     %9:vgpr_32 = IMPLICIT_DEF
@@ -950,40 +948,34 @@ body: |
     S_BRANCH %bb.4
 
   bb.4:
-  ; predecessors: %bb.3
     successors: %bb.5
 
     S_BRANCH %bb.5
 
   bb.5:
-  ; predecessors: %bb.3, %bb.4
     successors: %bb.7
 
     SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_BRANCH %bb.7
 
   bb.6:
-  ; predecessors: %bb.1, %bb.13
     successors: %bb.14
 
     SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_BRANCH %bb.14
 
   bb.7:
-  ; predecessors: %bb2, %bb.5
     successors: %bb.8
 
     SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_BRANCH %bb.8
 
   bb.8:
-  ; predecessors: %bb.7
     successors: %bb.9
 
     S_BRANCH %bb.9
 
   bb.9:
-  ; predecessors: %bb.8
     successors: %bb.11, %bb.12
 
     %12:vgpr_32 = IMPLICIT_DEF
@@ -992,33 +984,28 @@ body: |
     S_BRANCH %bb.11
 
   bb.10:
-  ; predecessors: %bb.12
     successors: %bb.13
 
     S_BRANCH %bb.13
 
   bb.11:
-  ; predecessors: %bb.9
     successors: %bb.12
 
     S_BRANCH %bb.12
 
   bb.12:
-  ; predecessors: %bb.9, %bb.11
     successors: %bb.10, %bb.13
 
     %15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_BRANCH %bb.10
 
   bb.13:
-  ; predecessors: %bb.10, %bb.12
     successors: %bb.6
 
     SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_BRANCH %bb.6
 
   bb.14:
-  ; predecessors: %bb.0, %bb.6
 
     SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
     S_ENDPGM 0

diff  --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir
new file mode 100644
index 000000000000000..f6233ab45c9f822
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir
@@ -0,0 +1,334 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -run-pass=liveintervals -run-pass=si-lower-control-flow -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs -o - %s | FileCheck %s
+
+# Check that verifier passes for the following.
+
+# Caused: Live segment doesn't end at a valid instruction
+---
+name:            _amdgpu_cs_main1
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: _amdgpu_cs_main1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[COPY1]], implicit-def dead $scc
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, %3, implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 [[S_XOR_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+  ; CHECK-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_OR_SAVEEXEC_B32_]], implicit-def $scc
+  ; CHECK-NEXT:   $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_B32_1]], implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  bb.0:
+    successors: %bb.2(0x40000000), %bb.3(0x40000000)
+    liveins: $vgpr0
+
+    %2:vgpr_32 = COPY killed $vgpr0
+    %6:sreg_32 = V_CMP_NE_U32_e64 0, killed %2, implicit $exec
+    %0:sreg_32 = SI_IF killed %6, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    SI_END_CF killed %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+
+  bb.3:
+    successors: %bb.4(0x40000000), %bb.1(0x40000000)
+
+    %1:sreg_32 = SI_ELSE killed %0, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.4:
+    successors: %bb.1(0x80000000)
+
+    S_BRANCH %bb.1
+
+...
+
+# Caused: Assertion `itr != mi2iMap.end() && "Instruction not in maps."' failed.
+---
+name:            _amdgpu_cs_main2
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: _amdgpu_cs_main2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 1, [[COPY]], implicit $exec
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B32_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[V_CMP_GT_I32_e64_]], implicit-def $scc
+  ; CHECK-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_OR_B32_]]
+  ; CHECK-NEXT:   $exec_lo = S_ANDN2_B32_term $exec_lo, [[S_OR_B32_]], implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_]], implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x80000000)
+    liveins: $vgpr0
+
+    %4:vgpr_32 = COPY killed $vgpr0
+    %8:sreg_32 = V_CMP_GT_I32_e64 1, killed %4, implicit $exec
+    %6:sreg_32 = S_MOV_B32 0
+    %10:sreg_32 = COPY killed %6
+
+  bb.1:
+    successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+    %1:sreg_32 = COPY killed %10
+    %2:sreg_32 = SI_IF_BREAK %8, killed %1, implicit-def dead $scc
+    %10:sreg_32 = COPY %2
+    SI_LOOP %2, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    SI_END_CF killed %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# Caused: Live range continues after kill flag
+---
+name:            _amdgpu_cs_main3
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: _amdgpu_cs_main3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.4(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, [[COPY]], 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NGT_F32_e64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, 0, 0, [[COPY]], 0, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+  ; CHECK-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[V_CMP_NLT_F32_e64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.4, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.4(0x40000000)
+    liveins: $vgpr0
+
+    %2:vgpr_32 = COPY killed $vgpr0
+    %5:sreg_32 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, %2, 0, implicit $mode, implicit $exec
+    %0:sreg_32 = SI_IF killed %5, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+    %7:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, 0, 0, killed %2, 0, implicit $mode, implicit $exec
+    %1:sreg_32 = SI_IF killed %7, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+
+  bb.3:
+    successors: %bb.4(0x80000000)
+
+    SI_END_CF killed %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+  bb.4:
+    SI_END_CF killed %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# Caused: Live range continues after dead def flag
+---
+name:            _amdgpu_cs_main4
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: _amdgpu_cs_main4
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B32_]]
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]]
+  ; CHECK-NEXT:   S_BRANCH %bb.6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE %9, %subreg.sub0, %9, %subreg.sub1, %9, %subreg.sub2, %9, %subreg.sub3
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY %11
+  ; CHECK-NEXT:   BUFFER_ATOMIC_ADD_OFFSET [[COPY6]], [[REG_SEQUENCE]], 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.7(0x04000000), %bb.6(0x7c000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY8]]
+  ; CHECK-NEXT:   [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]]
+  ; CHECK-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY7]], [[V_READLANE_B32_]], implicit-def dead $scc
+  ; CHECK-NEXT:   [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 1, [[S_FF1_I32_B32_]], implicit-def dead $scc
+  ; CHECK-NEXT:   [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[COPY8]], [[S_LSHL_B32_]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CMP_LG_U32 [[S_ANDN2_B32_]], 0, implicit-def $scc
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_ADD_I32_]]
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY2]], 0, implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 0, [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+  ; CHECK-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   dead [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_1]], [[COPY9]], implicit-def dead $scc
+  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  bb.0:
+    successors: %bb.1(0x40000000), %bb.5(0x40000000)
+    liveins: $vgpr0
+
+    %8:vgpr_32 = COPY killed $vgpr0
+    %10:sreg_32 = S_MOV_B32 0
+    %11:sreg_32 = V_CMP_NE_U32_e64 0, %8, implicit $exec
+    %0:sreg_32 = SI_IF killed %11, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    successors: %bb.6(0x80000000)
+
+    %13:sreg_32 = COPY $exec_lo
+    %1:sreg_32 = COPY %13
+    %25:sreg_32 = COPY killed %10
+    %26:sreg_32 = COPY killed %1
+    S_BRANCH %bb.6
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+    %23:sgpr_128 = REG_SEQUENCE killed %19, %subreg.sub0, %19, %subreg.sub1, %19, %subreg.sub2, %19, %subreg.sub3
+    %24:vgpr_32 = COPY killed %4
+    BUFFER_ATOMIC_ADD_OFFSET killed %24, killed %23, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+  bb.3:
+    successors: %bb.5(0x80000000)
+
+    SI_END_CF killed %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.4:
+    S_ENDPGM 0
+
+  bb.5:
+    successors: %bb.4(0x80000000)
+
+    SI_END_CF killed %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.4
+
+  bb.6:
+    successors: %bb.7(0x04000000), %bb.6(0x7c000000)
+
+    %2:sreg_32 = COPY killed %25
+    %3:sreg_32 = COPY killed %26
+    %14:sreg_32 = S_FF1_I32_B32 %3
+    %15:sreg_32 = V_READLANE_B32 %8, %14
+    %4:sreg_32 = S_ADD_I32 killed %2, killed %15, implicit-def dead $scc
+    %17:sreg_32 = S_LSHL_B32 1, killed %14, implicit-def dead $scc
+    %5:sreg_32 = S_ANDN2_B32 killed %3, killed %17, implicit-def dead $scc
+    S_CMP_LG_U32 %5, 0, implicit-def $scc
+    %25:sreg_32 = COPY %4
+    %26:sreg_32 = COPY killed %5
+    S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+    S_BRANCH %bb.7
+
+  bb.7:
+    successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+    %19:sreg_32 = S_MOV_B32 0
+    %20:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed %13, 0, implicit $exec
+    %21:sreg_32 = V_CMP_EQ_U32_e64 0, killed %20, implicit $exec
+    %7:sreg_32 = SI_IF killed %21, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+...


        


More information about the llvm-commits mailing list