[llvm] b03d902 - AMDGPU: Fix invalid liveness after si-optimize-exec-masking-pre-ra

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 22 18:04:33 PDT 2022


Author: Matt Arsenault
Date: 2022-06-22T20:49:03-04:00
New Revision: b03d902b61099cc5bfe7d17bbca59fccc1fd064a

URL: https://github.com/llvm/llvm-project/commit/b03d902b61099cc5bfe7d17bbca59fccc1fd064a
DIFF: https://github.com/llvm/llvm-project/commit/b03d902b61099cc5bfe7d17bbca59fccc1fd064a.diff

LOG: AMDGPU: Fix invalid liveness after si-optimize-exec-masking-pre-ra

This was leaving behind a use at the deleted instruction which the
verifier would fail during allocation.

Added: 
    llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir
    llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 12e6969be34b6..aba262dfa693f 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -39,7 +39,7 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
   MCRegister CondReg;
   MCRegister ExecReg;
 
-  Register optimizeVcndVcmpPair(MachineBasicBlock &MBB);
+  bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
   bool optimizeElseBranch(MachineBasicBlock &MBB);
 
 public:
@@ -90,8 +90,8 @@ static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx,
 static bool isDefBetween(const SIRegisterInfo &TRI,
                          LiveIntervals *LIS, Register Reg,
                          const MachineInstr &Sel, const MachineInstr &And) {
-  SlotIndex AndIdx = LIS->getInstructionIndex(And);
-  SlotIndex SelIdx = LIS->getInstructionIndex(Sel);
+  SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot();
+  SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot();
 
   if (Reg.isVirtual())
     return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx);
@@ -119,21 +119,20 @@ static bool isDefBetween(const SIRegisterInfo &TRI,
 // required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
 // lanes.
 //
-// Returns %cc register on success.
-Register
-SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
+// Returns true on success.
+bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
   auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
                            unsigned Opc = MI.getOpcode();
                            return Opc == AMDGPU::S_CBRANCH_VCCZ ||
                                   Opc == AMDGPU::S_CBRANCH_VCCNZ; });
   if (I == MBB.terminators().end())
-    return Register();
+    return false;
 
   auto *And =
       TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS);
   if (!And || And->getOpcode() != AndOpc ||
       !And->getOperand(1).isReg() || !And->getOperand(2).isReg())
-    return Register();
+    return false;
 
   MachineOperand *AndCC = &And->getOperand(1);
   Register CmpReg = AndCC->getReg();
@@ -143,49 +142,49 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
     CmpReg = AndCC->getReg();
     CmpSubReg = AndCC->getSubReg();
   } else if (And->getOperand(2).getReg() != Register(ExecReg)) {
-    return Register();
+    return false;
   }
 
   auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS);
   if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
                 Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
       Cmp->getParent() != And->getParent())
-    return Register();
+    return false;
 
   MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
   MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
   if (Op1->isImm() && Op2->isReg())
     std::swap(Op1, Op2);
   if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
-    return Register();
+    return false;
 
   Register SelReg = Op1->getReg();
   auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
   if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
-    return Register();
+    return false;
 
   if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
       TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
-    return Register();
+    return false;
 
   Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
   Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
   MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
   if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
       Op1->getImm() != 0 || Op2->getImm() != 1)
-    return Register();
+    return false;
 
   Register CCReg = CC->getReg();
 
   // If there was a def between the select and the and, we would need to move it
   // to fold this.
   if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
-    return Register();
+    return false;
 
+  // TODO: Guard against implicit def operands?
   LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
                     << *And);
 
-  LIS->RemoveMachineInstrFromMaps(*And);
   MachineInstr *Andn2 =
       BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
               And->getOperand(0).getReg())
@@ -196,34 +195,75 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
   MachineOperand &Andn2SCC = Andn2->getOperand(3);
   assert(Andn2SCC.getReg() == AMDGPU::SCC);
   Andn2SCC.setIsDead(AndSCC.isDead());
+
+  SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2);
   And->eraseFromParent();
-  LIS->InsertMachineInstrInMaps(*Andn2);
 
   LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
 
+  SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
+  SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
+
+  LiveInterval *CmpLI =
+      CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+
   // Try to remove compare. Cmp value should not used in between of cmp
   // and s_and_b64 if VCC or just unused if any other register.
-  if ((CmpReg.isVirtual() && MRI->use_nodbg_empty(CmpReg)) ||
+  if ((CmpReg.isVirtual() && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
       (CmpReg == Register(CondReg) &&
        std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
                     [&](const MachineInstr &MI) {
                       return MI.readsRegister(CondReg, TRI);
                     }))) {
     LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
-
+    if (CmpLI)
+      LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot());
     LIS->RemoveMachineInstrFromMaps(*Cmp);
     Cmp->eraseFromParent();
 
+    LiveInterval *SelLI =
+        SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
     // Try to remove v_cndmask_b32.
-    if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) {
+    if (SelLI && SelLI->Query(CmpIdx.getRegSlot()).isKill()) {
       LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
 
+      if (SelLI)
+        LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
       LIS->RemoveMachineInstrFromMaps(*Sel);
       Sel->eraseFromParent();
     }
   }
 
-  return CCReg;
+  if (CCReg.isVirtual()) {
+    LiveInterval &CCLI = LIS->getInterval(CCReg);
+    auto CCQ = CCLI.Query(SelIdx.getRegSlot());
+    if (CCQ.valueIn()) {
+      CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
+                                         AndIdx.getRegSlot(), CCQ.valueIn()));
+    }
+
+    if (CC->getSubReg()) {
+      LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
+      BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+      CCLI.refineSubRanges(
+          Allocator, Mask,
+          [=](LiveInterval::SubRange &SR) {
+            auto CCQS = SR.Query(SelIdx.getRegSlot());
+            if (CCQS.valueIn()) {
+              SR.addSegment(LiveRange::Segment(
+                  SelIdx.getRegSlot(), AndIdx.getRegSlot(), CCQS.valueIn()));
+            }
+          },
+          *LIS->getSlotIndexes(), *TRI);
+      CCLI.removeEmptySubRanges();
+
+      SmallVector<LiveInterval *> SplitLIs;
+      LIS->splitSeparateComponents(CCLI, SplitLIs);
+    }
+  } else
+    LIS->removeAllRegUnitsForPhysReg(CCReg);
+
+  return true;
 }
 
 // Optimize sequence
@@ -330,8 +370,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
       Changed = true;
     }
 
-    if (Register Reg = optimizeVcndVcmpPair(MBB)) {
-      RecalcRegs.insert(Reg);
+    if (optimizeVcndVcmpPair(MBB)) {
       RecalcRegs.insert(AMDGPU::VCC_LO);
       RecalcRegs.insert(AMDGPU::VCC_HI);
       RecalcRegs.insert(AMDGPU::SCC);

diff  --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir
new file mode 100644
index 0000000000000..be1e36c05a30d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir
@@ -0,0 +1,172 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-regalloc -misched-only-block=999 -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s
+
+# This run line is a total hack to get the live intervals to make it
+# to the verifier. This requires asserts to use
+# -misched-only-block. We use the scheduler only because -start-before
+# doesn't see si-optimize-exec-masking-pre-ra unless the scheduler is
+# part of the pass pipeline.
+
+---
+name: subreg_value_undef
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: subreg_value_undef
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+  ; CHECK-NEXT:   undef %2.sub1:sgpr_128 = S_MOV_B32 -1
+  ; CHECK-NEXT:   $vcc_lo = S_ANDN2_B32 $exec_lo, undef %2.sub0, implicit-def dead $scc
+  ; CHECK-NEXT:   %2.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   S_NOP 0, implicit %2.sub1
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+    undef %2.sub1:sgpr_128 = S_MOV_B32 -1
+    %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %2.sub0, implicit $exec
+    %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+    %2.sub1:sgpr_128 = COPY %1.sub0
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+  bb.1:
+    S_NOP 0, implicit %2.sub1
+...
+
+---
+name: needs_distribute_0
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: needs_distribute_0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+  ; CHECK-NEXT:   undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+  ; CHECK-NEXT:   $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+  ; CHECK-NEXT:   dead %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+    undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+    %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+    %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+    %2.sub1:sreg_64_xexec = COPY %1.sub0
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+  bb.1:
+...
+
+---
+name: needs_distribute_1
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: needs_distribute_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+  ; CHECK-NEXT:   undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+  ; CHECK-NEXT:   $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+  ; CHECK-NEXT:   %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   S_NOP 0, implicit %2.sub1
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+    undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+    %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+    %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+    %2.sub1:sreg_64_xexec = COPY %1.sub0
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+  bb.1:
+    S_NOP 0, implicit %2.sub1
+...
+
+---
+name: needs_distribute_2
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: needs_distribute_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+  ; CHECK-NEXT:   undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+  ; CHECK-NEXT:   $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+  ; CHECK-NEXT:   %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   S_NOP 0, implicit %2
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+    undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+    %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+    %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+    %2.sub1:sreg_64_xexec = COPY %1.sub0
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+  bb.1:
+    S_NOP 0, implicit %2
+...
+
+---
+name: needs_distribute_3
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: needs_distribute_3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr0_sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+  ; CHECK-NEXT:   undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+  ; CHECK-NEXT:   $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+  ; CHECK-NEXT:   %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   S_NOP 0, implicit %2.sub0
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    %0:sgpr_64 = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+    undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+    %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+    %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+    $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+    %2.sub1:sreg_64_xexec = COPY %1.sub0
+    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+  bb.1:
+    S_NOP 0, implicit %2.sub0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
new file mode 100644
index 0000000000000..501f1654d7c86
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
@@ -0,0 +1,357 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-regalloc -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s
+
+# Make sure liveness is correctly updated when folding the cndmask and
+# compare.
+
+---
+name:            cndmask_same_block
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_same_block
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  bb.0:
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+    S_BRANCH %bb.4
+
+  bb.2:
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_BRANCH %bb.4
+
+  bb.4:
+...
+
+---
+name:            cndmask_separate_block
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_separate_block
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  bb.0:
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+    S_BRANCH %bb.4
+
+  bb.2:
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_BRANCH %bb.4
+
+  bb.4:
+...
+
+---
+name:            cndmask_same_block_other_cmpreg_use
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_same_block_other_cmpreg_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+  ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+  ; CHECK-NEXT:   S_NOP 0, implicit [[V_CMP_NE_U32_e64_]]
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  bb.0:
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+    S_BRANCH %bb.4
+
+  bb.2:
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    S_NOP 0, implicit %2
+    $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_BRANCH %bb.4
+
+  bb.4:
+...
+
+---
+name:            cndmask_same_block_liveout_use
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_same_block_liveout_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit [[V_CNDMASK_B32_e64_]]
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  bb.0:
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+    S_BRANCH %bb.4
+
+  bb.2:
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_NOP 0, implicit %1
+    S_BRANCH %bb.4
+
+  bb.4:
+...
+
+# The legality check for removing the compare used to rely on
+# use_nodbg_empty, which fails on the undef use of %2. We would then
+# fail to update the interval correctly.
+
+---
+name:            cmp_reg_extra_undef_use
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cmp_reg_extra_undef_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, undef %2:sreg_64_xexec, implicit-def dead $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  bb.0:
+    %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+
+  bb.1:
+    $vcc = S_AND_B64 $exec, undef %2, implicit-def dead $scc
+
+  bb.3:
+
+...
+
+# use_nodbg_empty is insufficient for erasing %1's def when removing
+# V_CNDMASK_B32.
+
+---
+name:            cndmask_undef_extra_use
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_undef_extra_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit undef %1:vgpr_32
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  bb.0:
+    %0:sreg_64_xexec = IMPLICIT_DEF
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+    S_BRANCH %bb.4
+
+  bb.2:
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_NOP 0, implicit undef %1
+    S_BRANCH %bb.4
+
+  bb.4:
+...
+
+---
+name:            cndmask_is_undef
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: cndmask_is_undef
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead %0:sreg_64_xexec = S_MOV_B64 0
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, undef %0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  bb.0:
+    %0:sreg_64_xexec = S_MOV_B64 0
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+  bb.1:
+    S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+    S_BRANCH %bb.4
+
+  bb.2:
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0, implicit $exec
+    %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+    $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+    S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_BRANCH %bb.4
+
+  bb.4:
+...


        


More information about the llvm-commits mailing list