[llvm] 8bc5e7a - [AMDGPU] Additional liveness tests for si-optimize-exec-masking-pre-ra

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 5 23:05:57 PDT 2022


Author: Carl Ritson
Date: 2022-07-06T15:05:32+09:00
New Revision: 8bc5e7ac519f04f83ca5e73322778ea0d5baa91f

URL: https://github.com/llvm/llvm-project/commit/8bc5e7ac519f04f83ca5e73322778ea0d5baa91f
DIFF: https://github.com/llvm/llvm-project/commit/8bc5e7ac519f04f83ca5e73322778ea0d5baa91f.diff

LOG: [AMDGPU] Additional liveness tests for si-optimize-exec-masking-pre-ra

Merge tests and fixes from D128110 and D128315 on top of already
committed D128800.

Original author: arsenm

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D128882

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
    llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index e5e65a8dbbf1..57dbad468de8 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -159,6 +159,9 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
     return false;
 
   Register SelReg = Op1->getReg();
+  if (SelReg.isPhysical())
+    return false;
+
   auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
   if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
     return false;
@@ -264,13 +267,11 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
 
     // Try to remove v_cndmask_b32.
     if (SelLI) {
-      bool CanRemoveSel = SelLI->Query(CmpIdx.getRegSlot()).isKill();
-      if (!CanRemoveSel) {
-        // Try to shrink the live interval and check for dead def instead.
-        LIS->shrinkToUses(SelLI, nullptr);
-        CanRemoveSel = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
-      }
-      if (CanRemoveSel) {
+      // Kill status must be checked before shrinking the live range.
+      bool IsKill = SelLI->Query(CmpIdx.getRegSlot()).isKill();
+      LIS->shrinkToUses(SelLI);
+      bool IsDead = SelLI->Query(SelIdx.getRegSlot()).isDeadDef();
+      if (MRI->use_nodbg_empty(SelReg) && (IsKill || IsDead)) {
         LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
 
         LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());

diff  --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
index 9df269e66ba6..1403f9bd1cf0 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
@@ -272,6 +272,7 @@ body:             |
   ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
   ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
   ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.3
@@ -279,7 +280,7 @@ body:             |
   ; CHECK-NEXT: bb.3:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_NOP 0, implicit undef %1:vgpr_32
+  ; CHECK-NEXT:   S_NOP 0, implicit undef %1
   ; CHECK-NEXT:   S_BRANCH %bb.4
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
@@ -473,3 +474,215 @@ body:             |
 
   bb.4:
 ...
+
+# We would need to extend the live range of %0 to be live out of %bb.2
+
+---
+name:            register_not_marked_liveout
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+  returnsVoid:     true
+body:             |
+  ; CHECK-LABEL: name: register_not_marked_liveout
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, [[V_CNDMASK_B32_e64_]], implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.3, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  bb.0:
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+    %0:sreg_64_xexec = S_MOV_B64 0
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64 = V_CMP_GT_I32_e64 0, %1, implicit $exec
+    $exec = S_MOV_B64_term %2
+
+  bb.1:
+    $exec = S_MOV_B64_term %2
+    S_CBRANCH_EXECZ %bb.3, implicit $exec
+
+  bb.2:
+    %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+
+  bb.3:
+
+...
+
+# Can't delete V_CNDMASK_B32 when folding into the use in %bb.3 since
+# it's also used in %bb.0
+---
+name:            cndmask_multiple_uses
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_multiple_uses
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.5, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.4(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.5, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  bb.0:
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+    %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
+
+  bb.2:
+    $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc
+
+  bb.3:
+    %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec
+    $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc
+
+  bb.5:
+
+  bb.6:
+
+...
+
+# The live segment of %1 from V_CMP_GT_I32 needs to be shrunk after the use in %bb.1 is deleted
+---
+name:            leftover_use_of_selreg_extends_liverange
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: leftover_use_of_selreg_extends_liverange
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, [[V_CNDMASK_B32_e64_]], implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  bb.0:
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64 = V_CMP_GT_I32_e64 0, %1, implicit $exec
+    $exec = S_MOV_B64_term %2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+
+  bb.2:
+
+  bb.3:
+
+...
+
+---
+name:            leftover_use_of_selreg_extends_liverange_subrange
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: leftover_use_of_selreg_extends_liverange_subrange
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   undef %1.sub1:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_GT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_I32_e64 0, %1.sub1, implicit $exec
+  ; CHECK-NEXT:   %1.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec
+  ; CHECK-NEXT:   $exec = S_MOV_B64_term [[V_CMP_GT_I32_e64_]]
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit %1.sub0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  bb.0:
+    liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    undef %1.sub1:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %1.sub0 = V_MOV_B32_e32 123, implicit $exec
+    %2:sreg_64 = V_CMP_GT_I32_e64 0, %1.sub1, implicit $exec
+    $exec = S_MOV_B64_term %2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    %3:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1.sub1, implicit $exec
+    $vcc = S_AND_B64 $exec, %3, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+
+  bb.2:
+    S_NOP 0, implicit %1.sub0
+
+  bb.3:
+
+...


        


More information about the llvm-commits mailing list