[llvm] 1d8a94c - [AMDGPU] SILowerControlFlow: fix preservation of LiveIntervals
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 10 21:47:07 PDT 2023
Author: Carl Ritson
Date: 2023-09-11T13:46:28+09:00
New Revision: 1d8a94c4ffe758c4e9e81e63309d4a52a4db4356
URL: https://github.com/llvm/llvm-project/commit/1d8a94c4ffe758c4e9e81e63309d4a52a4db4356
DIFF: https://github.com/llvm/llvm-project/commit/1d8a94c4ffe758c4e9e81e63309d4a52a4db4356.diff
LOG: [AMDGPU] SILowerControlFlow: fix preservation of LiveIntervals
In emitElse live interval for SI_ELSE source must be recalculated
as SI_ELSE is removed, and new user is placed at block start.
In emitIfBreak live interval for new created AndReg must be
computed.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D158141
Added:
llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir
Modified:
llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 00cb5b2878f419c..a173adb6c58b59c 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -79,6 +79,7 @@ class SILowerControlFlow : public MachineFunctionPass {
SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
SmallSet<MachineBasicBlock *, 4> KillBlocks;
+ SmallSet<Register, 8> RecomputeRegs;
const TargetRegisterClass *BoolRC = nullptr;
unsigned AndOpc;
@@ -297,8 +298,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// FIXME: Is there a better way of adjusting the liveness? It shouldn't be
// hard to add another def here but I'm not sure how to correctly update the
// valno.
- LIS->removeInterval(SaveExecReg);
- LIS->createAndComputeVirtRegInterval(SaveExecReg);
+ RecomputeRegs.insert(SaveExecReg);
LIS->createAndComputeVirtRegInterval(Tmp);
if (!SimpleIf)
LIS->createAndComputeVirtRegInterval(CopyReg);
@@ -309,6 +309,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
MachineBasicBlock::iterator Start = MBB.begin();
@@ -319,7 +320,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
.add(MI.getOperand(1)); // Saved EXEC
if (LV)
- LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
+ LV->replaceKillInstruction(SrcReg, MI, *OrSaveExec);
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
@@ -331,9 +332,6 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(Exec)
.addReg(SaveReg);
- if (LIS)
- LIS->InsertMachineInstrInMaps(*And);
-
MachineInstr *Xor =
BuildMI(MBB, ElsePt, DL, TII->get(XorTermrOpc), Exec)
.addReg(Exec)
@@ -356,12 +354,13 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MI.eraseFromParent();
LIS->InsertMachineInstrInMaps(*OrSaveExec);
+ LIS->InsertMachineInstrInMaps(*And);
LIS->InsertMachineInstrInMaps(*Xor);
LIS->InsertMachineInstrInMaps(*Branch);
- LIS->removeInterval(DstReg);
- LIS->createAndComputeVirtRegInterval(DstReg);
+ RecomputeRegs.insert(SrcReg);
+ RecomputeRegs.insert(DstReg);
LIS->createAndComputeVirtRegInterval(SaveReg);
// Let this be recomputed.
@@ -388,8 +387,9 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
// AND the break condition operand with exec, then OR that into the "loop
// exit" mask.
MachineInstr *And = nullptr, *Or = nullptr;
+ Register AndReg;
if (!SkipAnding) {
- Register AndReg = MRI->createVirtualRegister(BoolRC);
+ AndReg = MRI->createVirtualRegister(BoolRC);
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
@@ -398,8 +398,6 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.addReg(AndReg)
.add(MI.getOperand(2));
- if (LIS)
- LIS->createAndComputeVirtRegInterval(AndReg);
} else {
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
@@ -411,9 +409,13 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
if (LIS) {
- if (And)
- LIS->InsertMachineInstrInMaps(*And);
LIS->ReplaceMachineInstrInMaps(MI, *Or);
+ if (And) {
+ // Read of original operand 1 is on And now not Or.
+ RecomputeRegs.insert(And->getOperand(2).getReg());
+ LIS->InsertMachineInstrInMaps(*And);
+ LIS->createAndComputeVirtRegInterval(AndReg);
+ }
}
MI.eraseFromParent();
@@ -436,6 +438,7 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
.add(MI.getOperand(1));
if (LIS) {
+ RecomputeRegs.insert(MI.getOperand(0).getReg());
LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
LIS->InsertMachineInstrInMaps(*Branch);
}
@@ -714,11 +717,13 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
if (MI.getOpcode() == AMDGPU::SI_INIT_EXEC) {
// This should be before all vector instructions.
- BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
+ MachineInstr *InitMI = BuildMI(*MBB, MBB->begin(), MI.getDebugLoc(),
TII->get(IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64), Exec)
.addImm(MI.getOperand(0).getImm());
- if (LIS)
+ if (LIS) {
LIS->RemoveMachineInstrFromMaps(MI);
+ LIS->InsertMachineInstrInMaps(*InitMI);
+ }
MI.eraseFromParent();
return;
}
@@ -789,8 +794,7 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
LIS->InsertMachineInstrInMaps(*CmpMI);
LIS->InsertMachineInstrInMaps(*CmovMI);
- LIS->removeInterval(InputReg);
- LIS->createAndComputeVirtRegInterval(InputReg);
+ RecomputeRegs.insert(InputReg);
LIS->createAndComputeVirtRegInterval(CountReg);
}
@@ -807,7 +811,7 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
while (!MBB.predecessors().empty()) {
MachineBasicBlock *P = *MBB.pred_begin();
- if (P->getFallThrough() == &MBB)
+ if (P->getFallThrough(false) == &MBB)
FallThrough = P;
P->ReplaceUsesOfBlockWith(&MBB, Succ);
}
@@ -828,14 +832,13 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
MBB.clear();
MBB.eraseFromParent();
if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
- if (!Succ->canFallThrough()) {
- MachineFunction *MF = FallThrough->getParent();
- MachineFunction::iterator FallThroughPos(FallThrough);
- MF->splice(std::next(FallThroughPos), Succ);
- } else
- BuildMI(*FallThrough, FallThrough->end(),
- FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
- .addMBB(Succ);
+ // Note: we cannot update block layout and preserve live intervals;
+ // hence we must insert a branch.
+ MachineInstr *BranchMI = BuildMI(*FallThrough, FallThrough->end(),
+ FallThrough->findBranchDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+ .addMBB(Succ);
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*BranchMI);
}
return true;
@@ -947,6 +950,14 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
optimizeEndCf();
+ if (LIS) {
+ for (Register Reg : RecomputeRegs) {
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
+
+ RecomputeRegs.clear();
LoweredEndCf.clear();
LoweredIf.clear();
KillBlocks.clear();
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
index a8b97c7932580d5..353697013a91951 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir
@@ -446,15 +446,16 @@ body: |
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.5(0x80000000)
; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: S_BRANCH %bb.5
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.4:
+ ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
+ ; GCN-NEXT: S_ENDPGM 0
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.5:
; GCN-NEXT: successors: %bb.4(0x80000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_BRANCH %bb.4
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.4:
- ; GCN-NEXT: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc
- ; GCN-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1, %bb.4
@@ -923,7 +924,6 @@ body: |
S_BRANCH %bb.1
bb.1:
- ; predecessors: %bb.0
successors: %bb.2, %bb.6
%3:vgpr_32 = IMPLICIT_DEF
@@ -932,7 +932,6 @@ body: |
S_BRANCH %bb.2
bb.2:
- ; predecessors: %bb.1
successors: %bb.3, %bb.7
%6:vgpr_32 = IMPLICIT_DEF
@@ -941,7 +940,6 @@ body: |
S_BRANCH %bb.3
bb.3:
- ; predecessors: %bb.2
successors: %bb.4, %bb.5
%9:vgpr_32 = IMPLICIT_DEF
@@ -950,40 +948,34 @@ body: |
S_BRANCH %bb.4
bb.4:
- ; predecessors: %bb.3
successors: %bb.5
S_BRANCH %bb.5
bb.5:
- ; predecessors: %bb.3, %bb.4
successors: %bb.7
SI_END_CF %11:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.7
bb.6:
- ; predecessors: %bb.1, %bb.13
successors: %bb.14
SI_END_CF %5:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.14
bb.7:
- ; predecessors: %bb2, %bb.5
successors: %bb.8
SI_END_CF %8:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.8
bb.8:
- ; predecessors: %bb.7
successors: %bb.9
S_BRANCH %bb.9
bb.9:
- ; predecessors: %bb.8
successors: %bb.11, %bb.12
%12:vgpr_32 = IMPLICIT_DEF
@@ -992,33 +984,28 @@ body: |
S_BRANCH %bb.11
bb.10:
- ; predecessors: %bb.12
successors: %bb.13
S_BRANCH %bb.13
bb.11:
- ; predecessors: %bb.9
successors: %bb.12
S_BRANCH %bb.12
bb.12:
- ; predecessors: %bb.9, %bb.11
successors: %bb.10, %bb.13
%15:sreg_64 = SI_ELSE %14:sreg_64, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.10
bb.13:
- ; predecessors: %bb.10, %bb.12
successors: %bb.6
SI_END_CF %15:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.6
bb.14:
- ; predecessors: %bb.0, %bb.6
SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir
new file mode 100644
index 000000000000000..f6233ab45c9f822
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-control-flow-live-intervals.mir
@@ -0,0 +1,334 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -run-pass=liveintervals -run-pass=si-lower-control-flow -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs -o - %s | FileCheck %s
+
+# Check that verifier passes for the following.
+
+# Caused: Live segment doesn't end at a valid instruction
+---
+name: _amdgpu_cs_main1
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: _amdgpu_cs_main1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_]], [[COPY1]], implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, %3, implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 [[S_XOR_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_OR_SAVEEXEC_B32_]], implicit-def $scc
+ ; CHECK-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_B32_1]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ bb.0:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ liveins: $vgpr0
+
+ %2:vgpr_32 = COPY killed $vgpr0
+ %6:sreg_32 = V_CMP_NE_U32_e64 0, killed %2, implicit $exec
+ %0:sreg_32 = SI_IF killed %6, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ SI_END_CF killed %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+
+ bb.3:
+ successors: %bb.4(0x40000000), %bb.1(0x40000000)
+
+ %1:sreg_32 = SI_ELSE killed %0, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.4:
+ successors: %bb.1(0x80000000)
+
+ S_BRANCH %bb.1
+
+...
+
+# Caused: Assertion `itr != mi2iMap.end() && "Instruction not in maps."' failed.
+---
+name: _amdgpu_cs_main2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: _amdgpu_cs_main2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_GT_I32_e64 1, [[COPY]], implicit $exec
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B32_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[V_CMP_GT_I32_e64_]], implicit-def $scc
+ ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_AND_B32_]], [[COPY2]], implicit-def $scc
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_OR_B32_]]
+ ; CHECK-NEXT: $exec_lo = S_ANDN2_B32_term $exec_lo, [[S_OR_B32_]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_OR_B32_]], implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr0
+
+ %4:vgpr_32 = COPY killed $vgpr0
+ %8:sreg_32 = V_CMP_GT_I32_e64 1, killed %4, implicit $exec
+ %6:sreg_32 = S_MOV_B32 0
+ %10:sreg_32 = COPY killed %6
+
+ bb.1:
+ successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+
+ %1:sreg_32 = COPY killed %10
+ %2:sreg_32 = SI_IF_BREAK %8, killed %1, implicit-def dead $scc
+ %10:sreg_32 = COPY %2
+ SI_LOOP %2, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ SI_END_CF killed %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# Caused: Live range continues after kill flag
+---
+name: _amdgpu_cs_main3
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: _amdgpu_cs_main3
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_CMP_NGT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, [[COPY]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NGT_F32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, 0, 0, [[COPY]], 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[V_CMP_NLT_F32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.4(0x40000000)
+ liveins: $vgpr0
+
+ %2:vgpr_32 = COPY killed $vgpr0
+ %5:sreg_32 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, %2, 0, implicit $mode, implicit $exec
+ %0:sreg_32 = SI_IF killed %5, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %7:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, 0, 0, killed %2, 0, implicit $mode, implicit $exec
+ %1:sreg_32 = SI_IF killed %7, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+
+ bb.3:
+ successors: %bb.4(0x80000000)
+
+ SI_END_CF killed %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.4:
+ SI_END_CF killed %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# Caused: Live range continues after dead def flag
+---
+name: _amdgpu_cs_main4
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: _amdgpu_cs_main4
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[COPY]], implicit $exec
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $exec_lo
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B32_]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]]
+ ; CHECK-NEXT: S_BRANCH %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE %9, %subreg.sub0, %9, %subreg.sub1, %9, %subreg.sub2, %9, %subreg.sub3
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %11
+ ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFSET [[COPY6]], [[REG_SEQUENCE]], 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: S_ENDPGM 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY1]], implicit-def $scc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x04000000), %bb.6(0x7c000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY5]]
+ ; CHECK-NEXT: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY8]]
+ ; CHECK-NEXT: [[V_READLANE_B32_:%[0-9]+]]:sreg_32 = V_READLANE_B32 [[COPY]], [[S_FF1_I32_B32_]]
+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY7]], [[V_READLANE_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 1, [[S_FF1_I32_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[COPY8]], [[S_LSHL_B32_]], implicit-def dead $scc
+ ; CHECK-NEXT: S_CMP_LG_U32 [[S_ANDN2_B32_]], 0, implicit-def $scc
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_ADD_I32_]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[V_MBCNT_LO_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e64 [[COPY2]], 0, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 0, [[V_MBCNT_LO_U32_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY9]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: dead [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[S_AND_B32_1]], [[COPY9]], implicit-def dead $scc
+ ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ liveins: $vgpr0
+
+ %8:vgpr_32 = COPY killed $vgpr0
+ %10:sreg_32 = S_MOV_B32 0
+ %11:sreg_32 = V_CMP_NE_U32_e64 0, %8, implicit $exec
+ %0:sreg_32 = SI_IF killed %11, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.6(0x80000000)
+
+ %13:sreg_32 = COPY $exec_lo
+ %1:sreg_32 = COPY %13
+ %25:sreg_32 = COPY killed %10
+ %26:sreg_32 = COPY killed %1
+ S_BRANCH %bb.6
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+ %23:sgpr_128 = REG_SEQUENCE killed %19, %subreg.sub0, %19, %subreg.sub1, %19, %subreg.sub2, %19, %subreg.sub3
+ %24:vgpr_32 = COPY killed %4
+ BUFFER_ATOMIC_ADD_OFFSET killed %24, killed %23, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ bb.3:
+ successors: %bb.5(0x80000000)
+
+ SI_END_CF killed %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.4:
+ S_ENDPGM 0
+
+ bb.5:
+ successors: %bb.4(0x80000000)
+
+ SI_END_CF killed %0, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.4
+
+ bb.6:
+ successors: %bb.7(0x04000000), %bb.6(0x7c000000)
+
+ %2:sreg_32 = COPY killed %25
+ %3:sreg_32 = COPY killed %26
+ %14:sreg_32 = S_FF1_I32_B32 %3
+ %15:sreg_32 = V_READLANE_B32 %8, %14
+ %4:sreg_32 = S_ADD_I32 killed %2, killed %15, implicit-def dead $scc
+ %17:sreg_32 = S_LSHL_B32 1, killed %14, implicit-def dead $scc
+ %5:sreg_32 = S_ANDN2_B32 killed %3, killed %17, implicit-def dead $scc
+ S_CMP_LG_U32 %5, 0, implicit-def $scc
+ %25:sreg_32 = COPY %4
+ %26:sreg_32 = COPY killed %5
+ S_CBRANCH_SCC1 %bb.6, implicit killed $scc
+ S_BRANCH %bb.7
+
+ bb.7:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %19:sreg_32 = S_MOV_B32 0
+ %20:vgpr_32 = V_MBCNT_LO_U32_B32_e64 killed %13, 0, implicit $exec
+ %21:sreg_32 = V_CMP_EQ_U32_e64 0, killed %20, implicit $exec
+ %7:sreg_32 = SI_IF killed %21, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+...
More information about the llvm-commits
mailing list