[llvm] d0f6641 - [AMDGPU] Fix liveness for loops in si-optimize-exec-masking-pre-ra
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 29 23:29:42 PDT 2022
Author: Carl Ritson
Date: 2022-06-30T15:26:50+09:00
New Revision: d0f6641615755e3dd27bc71390e6ae866dfef9ec
URL: https://github.com/llvm/llvm-project/commit/d0f6641615755e3dd27bc71390e6ae866dfef9ec
DIFF: https://github.com/llvm/llvm-project/commit/d0f6641615755e3dd27bc71390e6ae866dfef9ec.diff
LOG: [AMDGPU] Fix liveness for loops in si-optimize-exec-masking-pre-ra
Follow up to D127894, new liveness update code needs to handle
the case where S_ANDN2 input must be extended through loops when
V_CNDMASK_B32 has been hoisted.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D128800
Added:
Modified:
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index aba262dfa693..e5e65a8dbbf1 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -206,40 +206,25 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
LiveInterval *CmpLI =
CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+ LiveInterval *SelLI =
+ SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
- // Try to remove compare. Cmp value should not used in between of cmp
- // and s_and_b64 if VCC or just unused if any other register.
- if ((CmpReg.isVirtual() && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
- (CmpReg == Register(CondReg) &&
- std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
- [&](const MachineInstr &MI) {
- return MI.readsRegister(CondReg, TRI);
- }))) {
- LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
- if (CmpLI)
- LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot());
- LIS->RemoveMachineInstrFromMaps(*Cmp);
- Cmp->eraseFromParent();
-
- LiveInterval *SelLI =
- SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
- // Try to remove v_cndmask_b32.
- if (SelLI && SelLI->Query(CmpIdx.getRegSlot()).isKill()) {
- LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
-
- if (SelLI)
- LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
- LIS->RemoveMachineInstrFromMaps(*Sel);
- Sel->eraseFromParent();
- }
- }
-
+ // Update live intervals for CCReg before potentially removing CmpReg/SelReg,
+ // and their associated liveness information.
if (CCReg.isVirtual()) {
+ // Note: this ignores that SelLI might have multiple internal values
+ // or splits and simply extends the live range to cover all cases
+ // where the result of the v_cndmask_b32 was live (e.g. loops).
+ // This could yield worse register allocation in rare edge cases.
+ SlotIndex EndIdx = AndIdx.getRegSlot();
+ if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock())
+ EndIdx = SelLI->endIndex();
+
LiveInterval &CCLI = LIS->getInterval(CCReg);
auto CCQ = CCLI.Query(SelIdx.getRegSlot());
if (CCQ.valueIn()) {
CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
- AndIdx.getRegSlot(), CCQ.valueIn()));
+ EndIdx, CCQ.valueIn()));
}
if (CC->getSubReg()) {
@@ -251,7 +236,7 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
auto CCQS = SR.Query(SelIdx.getRegSlot());
if (CCQS.valueIn()) {
SR.addSegment(LiveRange::Segment(
- SelIdx.getRegSlot(), AndIdx.getRegSlot(), CCQS.valueIn()));
+ SelIdx.getRegSlot(), EndIdx, CCQS.valueIn()));
}
},
*LIS->getSlotIndexes(), *TRI);
@@ -263,6 +248,38 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
} else
LIS->removeAllRegUnitsForPhysReg(CCReg);
+ // Try to remove compare. Cmp value should not used in between of cmp
+ // and s_and_b64 if VCC or just unused if any other register.
+ if ((CmpReg.isVirtual() && CmpLI && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
+ (CmpReg == Register(CondReg) &&
+ std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
+ [&](const MachineInstr &MI) {
+ return MI.readsRegister(CondReg, TRI);
+ }))) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
+ if (CmpLI)
+ LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot());
+ LIS->RemoveMachineInstrFromMaps(*Cmp);
+ Cmp->eraseFromParent();
+
+ // Try to remove v_cndmask_b32.
+ if (SelLI) {
+ bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+ if (!CanRemoveSel) {
+ // Try to shrink the live interval and check for dead def instead.
+ LIS->shrinkToUses(SelLI, nullptr);
+ CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+ }
+ if (CanRemoveSel) {
+ LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+
+ LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
+ LIS->RemoveMachineInstrFromMaps(*Sel);
+ Sel->eraseFromParent();
+ }
+ }
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
index 501f1654d7c8..9df269e66ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
@@ -355,3 +355,121 @@ body: |
bb.4:
...
+
+# Liveness of V_CNDMASK_B32 source (%0) must be extended through loop.
+
+---
+name: cndmask_loop_cndmask
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_loop_cndmask
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], -1, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.4, implicit $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF1]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ %1:sreg_32 = S_ADD_I32 %1, -1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.4, implicit $scc
+
+ bb.2:
+ %4:sreg_64_xexec = V_CMP_NE_U32_e64 1, %2, implicit $exec
+ $vcc = S_AND_B64 $exec, %4, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_BRANCH %bb.1
+
+ bb.4:
+...
+
+---
+name: cndmask_loop_cndmask_split
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_loop_cndmask_split
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF1]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_ADD_I32_]], -1, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.5, implicit $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ bb.0:
+ $vcc = IMPLICIT_DEF
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ %1:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ S_CBRANCH_VCCZ %bb.5, implicit $vcc
+ S_BRANCH %bb.1
+
+ bb.5:
+ S_BRANCH %bb.4
+
+ bb.1:
+ %4:sreg_64_xexec = V_CMP_NE_U32_e64 1, %2, implicit $exec
+ $vcc = S_AND_B64 $exec, %4, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+
+ bb.2:
+ %1:sreg_32 = S_ADD_I32 %1, -1, implicit-def $scc
+ S_CBRANCH_SCC0 %bb.4, implicit $scc
+
+ bb.3:
+ S_BRANCH %bb.1
+
+ bb.4:
+...
More information about the llvm-commits
mailing list