[llvm] dd0caa8 - [AMDGPU] Fix liveness in the SIOptimizeExecMaskingPreRA.cpp

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 5 12:22:30 PST 2023


Author: Stanislav Mekhanoshin
Date: 2023-02-05T12:21:28-08:00
New Revision: dd0caa82de593f080469c772b5b092e1bf7f7cc0

URL: https://github.com/llvm/llvm-project/commit/dd0caa82de593f080469c772b5b092e1bf7f7cc0
DIFF: https://github.com/llvm/llvm-project/commit/dd0caa82de593f080469c772b5b092e1bf7f7cc0.diff

LOG: [AMDGPU] Fix liveness in the SIOptimizeExecMaskingPreRA.cpp

If a condition register def happens past the newly created use
we do not properly update LIS. It has two problems:

1) We do not extend defining segment to the end of its block
   marking it a live-out (this is regression after
   https://reviews.llvm.org/rG09d38dd7704a52e8ad2d5f8f39aaeccf107f4c56)

2) We do not extend use segment to the beginning of the use block
   marking it a live-in.

Fixes: SWDEV-379563

Differential Revision: https://reviews.llvm.org/D143302

Added: 
    llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 85de3a5484111..1cdd2ae2204e9 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -226,16 +226,23 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
       auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
       assert(DefSegment != SelLI->end() &&
              "No live interval segment covering definition?");
-      for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) {
+      for (auto I = DefSegment; I != SelLI->end(); ++I) {
         SlotIndex Start = I->start < SelIdx.getRegSlot() ?
                           SelIdx.getRegSlot() : I->start;
         SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
                         I->end : AndIdx.getRegSlot();
-        Dst.addSegment(LiveRange::Segment(Start, End, VNI));
+        if (Start < End)
+          Dst.addSegment(LiveRange::Segment(Start, End, VNI));
       }
-      // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
       if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
-        Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+        // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
+        Dst.addSegment(
+            LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
+      else if (!Dst.liveAt(AndIdx))
+        // This is live-in, so extend segment to the beginning of the block.
+        Dst.addSegment(LiveRange::Segment(
+            LIS->getSlotIndexes()->getMBBStartIdx(Andn2->getParent()),
+            AndIdx.getRegSlot(), VNI));
     };
 
     LiveInterval &CCLI = LIS->getInterval(CCReg);

diff  --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir
new file mode 100644
index 0000000000000..6658171c31035
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-def-after-use.mir
@@ -0,0 +1,113 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx1030 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra,si-optimize-exec-masking-pre-ra %s -o - | FileCheck --check-prefix=GCN %s
+
+# FIXME: Second run of the pass is a workaround for a bug in
+# -run-pass. The verifier doesn't detect broken LiveIntervals, see bug
+# 46873
+
+# %8 is defined at the end, but it will be used in bb.2.
+# Make sure we properly extend its liverange to the beginning of the bb.2.
+# and to the end of the bb.6
+
+---
+name: def_later_than_use
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: def_later_than_use
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT undef %1:vreg_64, 0, 0, implicit $exec
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.6(0x04000000), %bb.1(0x7c000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[DEF]], implicit-def dead $scc
+  ; GCN-NEXT:   $vcc_lo = COPY [[S_AND_B32_]]
+  ; GCN-NEXT:   S_CBRANCH_VCCNZ %bb.6, implicit $vcc
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   successors: %bb.3(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $vcc_lo = S_ANDN2_B32 $exec_lo, %8, implicit-def dead $scc
+  ; GCN-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+  ; GCN-NEXT:   S_BRANCH %bb.3
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.3:
+  ; GCN-NEXT:   successors: %bb.4(0x50000000), %bb.5(0x30000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $exec_lo = S_OR_B32 $exec_lo, %6, implicit-def $scc
+  ; GCN-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %5, implicit $exec
+  ; GCN-NEXT:   $vcc_lo = S_AND_B32 $exec_lo, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+  ; GCN-NEXT:   S_CBRANCH_VCCNZ %bb.5, implicit $vcc
+  ; GCN-NEXT:   S_BRANCH %bb.4
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.4:
+  ; GCN-NEXT:   successors: %bb.5(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.5:
+  ; GCN-NEXT:   S_ENDPGM 0
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.6:
+  ; GCN-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CMP_NEQ_F16_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NEQ_F16_e64 0, 0, 0, [[GLOBAL_LOAD_USHORT]], 0, implicit $mode, implicit $exec
+  ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_NEQ_F16_e64_]], implicit $exec
+  ; GCN-NEXT:   [[S_CSELECT_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit undef $scc
+  ; GCN-NEXT:   [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 $exec_lo, [[S_CSELECT_B32_]], implicit-def dead $scc
+  ; GCN-NEXT:   $exec_lo = S_MOV_B32_term [[S_AND_B32_1]]
+  ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  bb.0:
+    successors: %bb.1(0x80000000)
+
+    %0:vgpr_32 = GLOBAL_LOAD_USHORT undef %1:vreg_64, 0, 0, implicit $exec
+    %2:sreg_32_xm0_xexec = IMPLICIT_DEF
+
+  bb.1:
+    successors: %bb.6(0x04000000), %bb.1(0x7c000000)
+
+    %3:sreg_32 = S_AND_B32 $exec_lo, %2, implicit-def dead $scc
+    $vcc_lo = COPY %3
+    S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc
+    S_BRANCH %bb.1
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+    %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %5, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    successors: %bb.4(0x50000000), %bb.5(0x30000000)
+
+    $exec_lo = S_OR_B32 $exec_lo, %6, implicit-def $scc
+    %7:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %5, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %7, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.5, implicit killed $vcc
+    S_BRANCH %bb.4
+
+  bb.4:
+    successors: %bb.5(0x80000000)
+
+  bb.5:
+    S_ENDPGM 0
+
+  bb.6:
+    successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+    %8:sreg_32_xm0_xexec = V_CMP_NEQ_F16_e64 0, 0, 0, %0, 0, implicit $mode, implicit $exec
+    %5:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %8, implicit $exec
+    %9:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit undef $scc
+    %10:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
+    %6:sreg_32 = S_AND_B32 %10, %9, implicit-def dead $scc
+    $exec_lo = S_MOV_B32_term %6
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
+    S_BRANCH %bb.2
+
+...


        


More information about the llvm-commits mailing list