[llvm] dd0caa8 - [AMDGPU] Fix liveness in the SIOptimizeExecMaskingPreRA.cpp
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 5 12:22:30 PST 2023
Author: Stanislav Mekhanoshin
Date: 2023-02-05T12:21:28-08:00
New Revision: dd0caa82de593f080469c772b5b092e1bf7f7cc0
URL: https://github.com/llvm/llvm-project/commit/dd0caa82de593f080469c772b5b092e1bf7f7cc0
DIFF: https://github.com/llvm/llvm-project/commit/dd0caa82de593f080469c772b5b092e1bf7f7cc0.diff
LOG: [AMDGPU] Fix liveness in the SIOptimizeExecMaskingPreRA.cpp
If a condition register def happens past the newly created use
we do not properly update LIS. It has two problems:
1) We do not extend defining segment to the end of its block
marking it a live-out (this is regression after
https://reviews.llvm.org/rG09d38dd7704a52e8ad2d5f8f39aaeccf107f4c56)
2) We do not extend use segment to the beginning of the use block
marking it a live-in.
Fixes: SWDEV-379563
Differential Revision: https://reviews.llvm.org/D143302
Added:
llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir
Modified:
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 85de3a5484111..1cdd2ae2204e9 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -226,16 +226,23 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
assert(DefSegment != SelLI->end() &&
"No live interval segment covering definition?");
- for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) {
+ for (auto I = DefSegment; I != SelLI->end(); ++I) {
SlotIndex Start = I->start < SelIdx.getRegSlot() ?
SelIdx.getRegSlot() : I->start;
SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
I->end : AndIdx.getRegSlot();
- Dst.addSegment(LiveRange::Segment(Start, End, VNI));
+ if (Start < End)
+ Dst.addSegment(LiveRange::Segment(Start, End, VNI));
}
- // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
- Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+ // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
+ Dst.addSegment(
+ LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+ else if (!Dst.liveAt(AndIdx))
+ // This is live-in, so extend segment to the beginning of the block.
+ Dst.addSegment(LiveRange::Segment(
+ LIS->getSlotIndexes()->getMBBStartIdx(Andn2->getParent()),
+ AndIdx.getRegSlot(), VNI));
};
LiveInterval &CCLI = LIS->getInterval(CCReg);
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir
new file mode 100644
index 0000000000000..6658171c31035
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra,si-optimize-exec-masking-pre-ra %s -o - | FileCheck --check-prefix=GCN %s
+
+# FIXME: Second run of the pass is a workaround for a bug in
+# -run-pass. The verifier doesn't detect broken LiveIntervals, see bug
+# 46873
+
+# %8 is defined at the end, but it will be used in bb.2.
+# Make sure we properly extend its liverange to the beginning of the bb.2.
+# and to the end of the bb.6
+
+---
+name: def_later_than_use
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: def_later_than_use
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT undef %1:vreg_64, 0, 0, implicit $exec
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[DEF]], implicit-def dead $scc
+ ; GCN-NEXT: $vcc_lo = COPY [[S_AND_B32_]]
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit $vcc
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: successors: %bb.3(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %8, implicit-def dead $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+ ; GCN-NEXT: S_BRANCH %bb.3
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.3:
+ ; GCN-NEXT: successors: %bb.4(0x50000000), %bb.5(0x30000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $exec_lo = S_OR_B32 $exec_lo, %6, implicit-def $scc
+ ; GCN-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %5, implicit $exec
+ ; GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+ ; GCN-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit $vcc
+ ; GCN-NEXT: S_BRANCH %bb.4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.4:
+ ; GCN-NEXT: successors: %bb.5(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.5:
+ ; GCN-NEXT: S_ENDPGM 0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.6:
+ ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NEQ_F16_e64 0, 0, 0, [[GLOBAL_LOAD_USHORT]], 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NEQ_F16_e64_]], implicit $exec
+ ; GCN-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit undef $scc
+ ; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_CSELECT_B32_]], implicit-def dead $scc
+ ; GCN-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+ ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ bb.0:
+ successors: %bb.1(0x80000000)
+
+ %0:vgpr_32 = GLOBAL_LOAD_USHORT undef %1:vreg_64, 0, 0, implicit $exec
+ %2:sreg_32_xm0_xexec = IMPLICIT_DEF
+
+ bb.1:
+ successors: %bb.6(0x04000000), %bb.1(0x7c000000)
+
+ %3:sreg_32 = S_AND_B32 $exec_lo, %2, implicit-def dead $scc
+ $vcc_lo = COPY %3
+ S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc
+ S_BRANCH %bb.1
+
+ bb.2:
+ successors: %bb.3(0x80000000)
+
+ %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %5, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.4(0x50000000), %bb.5(0x30000000)
+
+ $exec_lo = S_OR_B32 $exec_lo, %6, implicit-def $scc
+ %7:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %5, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %7, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.5, implicit killed $vcc
+ S_BRANCH %bb.4
+
+ bb.4:
+ successors: %bb.5(0x80000000)
+
+ bb.5:
+ S_ENDPGM 0
+
+ bb.6:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %8:sreg_32_xm0_xexec = V_CMP_NEQ_F16_e64 0, 0, 0, %0, 0, implicit $mode, implicit $exec
+ %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %8, implicit $exec
+ %9:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit undef $scc
+ %10:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+ %6:sreg_32 = S_AND_B32 %10, %9, implicit-def dead $scc
+ $exec_lo = S_MOV_B32_term %6
+ S_CBRANCH_EXECZ %bb.3, implicit $exec
+ S_BRANCH %bb.2
+
+...
More information about the llvm-commits
mailing list