[llvm] 8bc5e7a - [AMDGPU] Additional liveness tests for si-optimize-exec-masking-pre-ra
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 5 23:05:57 PDT 2022
Author: Carl Ritson
Date: 2022-07-06T15:05:32+09:00
New Revision: 8bc5e7ac519f04f83ca5e73322778ea0d5baa91f
URL: https://github.com/llvm/llvm-project/commit/8bc5e7ac519f04f83ca5e73322778ea0d5baa91f
DIFF: https://github.com/llvm/llvm-project/commit/8bc5e7ac519f04f83ca5e73322778ea0d5baa91f.diff
LOG: [AMDGPU] Additional liveness tests for si-optimize-exec-masking-pre-ra
Merge tests and fixes from D128110 and D128315 on top of already
committed D128800.
Original author: arsenm
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D128882
Added:
Modified:
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index e5e65a8dbbf1..57dbad468de8 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -159,6 +159,9 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
return false;
Register SelReg = Op1->getReg();
+ if (SelReg.isPhysical())
+ return false;
+
auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return false;
@@ -264,13 +267,11 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
// Try to remove v_cndmask_b32.
if (SelLI) {
- bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill();
- if (!CanRemoveSel) {
- // Try to shrink the live interval and check for dead def instead.
- LIS->shrinkToUses(SelLI, nullptr);
- CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
- }
- if (CanRemoveSel) {
+ // Kill status must be checked before shrinking the live range.
+ bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+ LIS->shrinkToUses(SelLI);
+ bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+ if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
index 9df269e66ba6..1403f9bd1cf0 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
@@ -272,6 +272,7 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
; CHECK-NEXT: S_BRANCH %bb.3
@@ -279,7 +280,7 @@ body: |
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_NOP 0, implicit undef %1:vgpr_32
+ ; CHECK-NEXT: S_NOP 0, implicit undef %1
; CHECK-NEXT: S_BRANCH %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
@@ -473,3 +474,215 @@ body: |
bb.4:
...
+
+# We would need to extend the live range of %0 to be live out of %bb.2
+
+---
+name: register_not_marked_liveout
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ returnsVoid: true
+body: |
+ ; CHECK-LABEL: name: register_not_marked_liveout
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, [[V_CNDMASK_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:sreg_64_xexec = S_MOV_B64 0
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %2:sreg_64 = V_CMP_GT_I32_e64 0, %1, implicit $exec
+ $exec = S_MOV_B64_term %2
+
+ bb.1:
+ $exec = S_MOV_B64_term %2
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
+
+ bb.2:
+ %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+
+ bb.3:
+
+...
+
+# Can't delete V_CNDMASK_B32 when folding into the use in %bb.3 since
+# it's also used in %bb.0
+---
+name: cndmask_multiple_uses
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_multiple_uses
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+
+ bb.2:
+ $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc
+
+ bb.3:
+ %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec
+ $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc
+
+ bb.5:
+
+ bb.6:
+
+...
+
+# The live segment of %1 from V_CMP_GT_I32 needs to be shrunk after the use in %bb.1 is deleted
+---
+name: leftover_use_of_selreg_extends_liverange
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: leftover_use_of_selreg_extends_liverange
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, [[V_CNDMASK_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def dead $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %2:sreg_64 = V_CMP_GT_I32_e64 0, %1, implicit $exec
+ $exec = S_MOV_B64_term %2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+
+ bb.2:
+
+ bb.3:
+
+...
+
+---
+name: leftover_use_of_selreg_extends_liverange_subrange
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: leftover_use_of_selreg_extends_liverange_subrange
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: undef %1.sub1:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, %1.sub1, implicit $exec
+ ; CHECK-NEXT: %1.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def dead $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit %1.sub0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ bb.0:
+ liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ undef %1.sub1:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %1.sub0 = V_MOV_B32_e32 123, implicit $exec
+ %2:sreg_64 = V_CMP_GT_I32_e64 0, %1.sub1, implicit $exec
+ $exec = S_MOV_B64_term %2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1.sub1, implicit $exec
+ $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+
+ bb.2:
+ S_NOP 0, implicit %1.sub0
+
+ bb.3:
+
+...
More information about the llvm-commits
mailing list