[llvm] b03d902 - AMDGPU: Fix invalid liveness after si-optimize-exec-masking-pre-ra
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 22 18:04:33 PDT 2022
Author: Matt Arsenault
Date: 2022-06-22T20:49:03-04:00
New Revision: b03d902b61099cc5bfe7d17bbca59fccc1fd064a
URL: https://github.com/llvm/llvm-project/commit/b03d902b61099cc5bfe7d17bbca59fccc1fd064a
DIFF: https://github.com/llvm/llvm-project/commit/b03d902b61099cc5bfe7d17bbca59fccc1fd064a.diff
LOG: AMDGPU: Fix invalid liveness after si-optimize-exec-masking-pre-ra
This was leaving behind a use at the deleted instruction which the
verifier would fail during allocation.
Added:
llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir
llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
Modified:
llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 12e6969be34b6..aba262dfa693f 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -39,7 +39,7 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
MCRegister CondReg;
MCRegister ExecReg;
- Register optimizeVcndVcmpPair(MachineBasicBlock &MBB);
+ bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
bool optimizeElseBranch(MachineBasicBlock &MBB);
public:
@@ -90,8 +90,8 @@ static bool isDefBetween(const LiveRange &LR, SlotIndex AndIdx,
static bool isDefBetween(const SIRegisterInfo &TRI,
LiveIntervals *LIS, Register Reg,
const MachineInstr &Sel, const MachineInstr &And) {
- SlotIndex AndIdx = LIS->getInstructionIndex(And);
- SlotIndex SelIdx = LIS->getInstructionIndex(Sel);
+ SlotIndex AndIdx = LIS->getInstructionIndex(And).getRegSlot();
+ SlotIndex SelIdx = LIS->getInstructionIndex(Sel).getRegSlot();
if (Reg.isVirtual())
return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx);
@@ -119,21 +119,20 @@ static bool isDefBetween(const SIRegisterInfo &TRI,
// required part of the pattern since V_CNDMASK_B32 writes zeroes for inactive
// lanes.
//
-// Returns %cc register on success.
-Register
-SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
+// Returns true on success.
+bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
auto I = llvm::find_if(MBB.terminators(), [](const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
return Opc == AMDGPU::S_CBRANCH_VCCZ ||
Opc == AMDGPU::S_CBRANCH_VCCNZ; });
if (I == MBB.terminators().end())
- return Register();
+ return false;
auto *And =
TRI->findReachingDef(CondReg, AMDGPU::NoSubRegister, *I, *MRI, LIS);
if (!And || And->getOpcode() != AndOpc ||
!And->getOperand(1).isReg() || !And->getOperand(2).isReg())
- return Register();
+ return false;
MachineOperand *AndCC = &And->getOperand(1);
Register CmpReg = AndCC->getReg();
@@ -143,49 +142,49 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
CmpReg = AndCC->getReg();
CmpSubReg = AndCC->getSubReg();
} else if (And->getOperand(2).getReg() != Register(ExecReg)) {
- return Register();
+ return false;
}
auto *Cmp = TRI->findReachingDef(CmpReg, CmpSubReg, *And, *MRI, LIS);
if (!Cmp || !(Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e32 ||
Cmp->getOpcode() == AMDGPU::V_CMP_NE_U32_e64) ||
Cmp->getParent() != And->getParent())
- return Register();
+ return false;
MachineOperand *Op1 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src0);
MachineOperand *Op2 = TII->getNamedOperand(*Cmp, AMDGPU::OpName::src1);
if (Op1->isImm() && Op2->isReg())
std::swap(Op1, Op2);
if (!Op1->isReg() || !Op2->isImm() || Op2->getImm() != 1)
- return Register();
+ return false;
Register SelReg = Op1->getReg();
auto *Sel = TRI->findReachingDef(SelReg, Op1->getSubReg(), *Cmp, *MRI, LIS);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
- return Register();
+ return false;
if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
TII->hasModifiersSet(*Sel, AMDGPU::OpName::src1_modifiers))
- return Register();
+ return false;
Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
if (!Op1->isImm() || !Op2->isImm() || !CC->isReg() ||
Op1->getImm() != 0 || Op2->getImm() != 1)
- return Register();
+ return false;
Register CCReg = CC->getReg();
// If there was a def between the select and the and, we would need to move it
// to fold this.
if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And))
- return Register();
+ return false;
+ // TODO: Guard against implicit def operands?
LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
<< *And);
- LIS->RemoveMachineInstrFromMaps(*And);
MachineInstr *Andn2 =
BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
And->getOperand(0).getReg())
@@ -196,34 +195,75 @@ SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
MachineOperand &Andn2SCC = Andn2->getOperand(3);
assert(Andn2SCC.getReg() == AMDGPU::SCC);
Andn2SCC.setIsDead(AndSCC.isDead());
+
+ SlotIndex AndIdx = LIS->ReplaceMachineInstrInMaps(*And, *Andn2);
And->eraseFromParent();
- LIS->InsertMachineInstrInMaps(*Andn2);
LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n');
+ SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
+ SlotIndex SelIdx = LIS->getInstructionIndex(*Sel);
+
+ LiveInterval *CmpLI =
+ CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr;
+
// Try to remove compare. Cmp value should not used in between of cmp
// and s_and_b64 if VCC or just unused if any other register.
- if ((CmpReg.isVirtual() && MRI->use_nodbg_empty(CmpReg)) ||
+ if ((CmpReg.isVirtual() && CmpLI->Query(AndIdx.getRegSlot()).isKill()) ||
(CmpReg == Register(CondReg) &&
std::none_of(std::next(Cmp->getIterator()), Andn2->getIterator(),
[&](const MachineInstr &MI) {
return MI.readsRegister(CondReg, TRI);
}))) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Cmp << '\n');
-
+ if (CmpLI)
+ LIS->removeVRegDefAt(*CmpLI, CmpIdx.getRegSlot());
LIS->RemoveMachineInstrFromMaps(*Cmp);
Cmp->eraseFromParent();
+ LiveInterval *SelLI =
+ SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr;
// Try to remove v_cndmask_b32.
- if (SelReg.isVirtual() && MRI->use_nodbg_empty(SelReg)) {
+ if (SelLI && SelLI->Query(CmpIdx.getRegSlot()).isKill()) {
LLVM_DEBUG(dbgs() << "Erasing: " << *Sel << '\n');
+ if (SelLI)
+ LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
LIS->RemoveMachineInstrFromMaps(*Sel);
Sel->eraseFromParent();
}
}
- return CCReg;
+ if (CCReg.isVirtual()) {
+ LiveInterval &CCLI = LIS->getInterval(CCReg);
+ auto CCQ = CCLI.Query(SelIdx.getRegSlot());
+ if (CCQ.valueIn()) {
+ CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(),
+ AndIdx.getRegSlot(), CCQ.valueIn()));
+ }
+
+ if (CC->getSubReg()) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ CCLI.refineSubRanges(
+ Allocator, Mask,
+ [=](LiveInterval::SubRange &SR) {
+ auto CCQS = SR.Query(SelIdx.getRegSlot());
+ if (CCQS.valueIn()) {
+ SR.addSegment(LiveRange::Segment(
+ SelIdx.getRegSlot(), AndIdx.getRegSlot(), CCQS.valueIn()));
+ }
+ },
+ *LIS->getSlotIndexes(), *TRI);
+ CCLI.removeEmptySubRanges();
+
+ SmallVector<LiveInterval *> SplitLIs;
+ LIS->splitSeparateComponents(CCLI, SplitLIs);
+ }
+ } else
+ LIS->removeAllRegUnitsForPhysReg(CCReg);
+
+ return true;
}
// Optimize sequence
@@ -330,8 +370,7 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
Changed = true;
}
- if (Register Reg = optimizeVcndVcmpPair(MBB)) {
- RecalcRegs.insert(Reg);
+ if (optimizeVcndVcmpPair(MBB)) {
RecalcRegs.insert(AMDGPU::VCC_LO);
RecalcRegs.insert(AMDGPU::VCC_HI);
RecalcRegs.insert(AMDGPU::SCC);
diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir
new file mode 100644
index 0000000000000..be1e36c05a30d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness-wave32.mir
@@ -0,0 +1,172 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-regalloc -misched-only-block=999 -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s
+
+# This run line is a total hack to get the live intervals to make it
+# to the verifier. This requires asserts to use
+# -misched-only-block. We use the scheduler only because -start-before
+# doesn't see si-optimize-exec-masking-pre-ra unless the scheduler is
+# part of the pass pipeline.
+
+---
+name: subreg_value_undef
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: subreg_value_undef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+ ; CHECK-NEXT: undef %2.sub1:sgpr_128 = S_MOV_B32 -1
+ ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, undef %2.sub0, implicit-def dead $scc
+ ; CHECK-NEXT: %2.sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_NOP 0, implicit %2.sub1
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+ undef %2.sub1:sgpr_128 = S_MOV_B32 -1
+ %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %2.sub0, implicit $exec
+ %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+ %2.sub1:sgpr_128 = COPY %1.sub0
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+ bb.1:
+ S_NOP 0, implicit %2.sub1
+...
+
+---
+name: needs_distribute_0
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: needs_distribute_0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+ ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+ ; CHECK-NEXT: dead %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+ undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+ %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+ %2.sub1:sreg_64_xexec = COPY %1.sub0
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+ bb.1:
+...
+
+---
+name: needs_distribute_1
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: needs_distribute_1
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+ ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+ ; CHECK-NEXT: %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_NOP 0, implicit %2.sub1
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+ undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+ %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+ %2.sub1:sreg_64_xexec = COPY %1.sub0
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+ bb.1:
+ S_NOP 0, implicit %2.sub1
+...
+
+---
+name: needs_distribute_2
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: needs_distribute_2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+ ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+ ; CHECK-NEXT: %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_NOP 0, implicit %2
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+ undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+ %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+ %2.sub1:sreg_64_xexec = COPY %1.sub0
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+ bb.1:
+ S_NOP 0, implicit %2
+...
+
+---
+name: needs_distribute_3
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: needs_distribute_3
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (s128), align 8, addrspace 1)
+ ; CHECK-NEXT: undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ ; CHECK-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %2.sub0, implicit-def dead $scc
+ ; CHECK-NEXT: %2.sub1:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub0
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: S_NOP 0, implicit %2.sub0
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0, 0, 0 :: (load (s128), align 8, addrspace 1)
+ undef %2.sub0:sreg_64_xexec = S_MOV_B32 -1
+ %3:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %2.sub0, implicit $exec
+ %4:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, %3, implicit $exec
+ $vcc_lo = S_AND_B32 $exec_lo, %4, implicit-def dead $scc
+ %2.sub1:sreg_64_xexec = COPY %1.sub0
+ S_CBRANCH_VCCNZ %bb.1, implicit $vcc
+
+ bb.1:
+ S_NOP 0, implicit %2.sub0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
new file mode 100644
index 0000000000000..501f1654d7c86
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir
@@ -0,0 +1,357 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-regalloc -start-before=machine-scheduler -stop-after=greedy,0 -o - %s | FileCheck %s
+
+# Make sure liveness is correctly updated when folding the cndmask and
+# compare.
+
+---
+name: cndmask_same_block
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_same_block
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_BRANCH %bb.4
+
+ bb.4:
+...
+
+---
+name: cndmask_separate_block
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_separate_block
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_BRANCH %bb.4
+
+ bb.4:
+...
+
+---
+name: cndmask_same_block_other_cmpreg_use
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_same_block_other_cmpreg_use
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+ ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_]], implicit $exec
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CMP_NE_U32_e64_]]
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ S_NOP 0, implicit %2
+ $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_BRANCH %bb.4
+
+ bb.4:
+...
+
+---
+name: cndmask_same_block_liveout_use
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_same_block_liveout_use
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[DEF]], implicit $exec
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_NOP 0, implicit %1
+ S_BRANCH %bb.4
+
+ bb.4:
+...
+
+# The legality check for removing the compare used to rely on
+# use_nodbg_empty, which fails on the undef use of %2. We would then
+# fail to update the interval correctly.
+
+---
+name: cmp_reg_extra_undef_use
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cmp_reg_extra_undef_use
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_AND_B64 $exec, undef %2:sreg_64_xexec, implicit-def dead $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ bb.0:
+ %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %1:sreg_64_xexec, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %0, implicit $exec
+ $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+
+ bb.1:
+ $vcc = S_AND_B64 $exec, undef %2, implicit-def dead $scc
+
+ bb.3:
+
+...
+
+# use_nodbg_empty is insufficient for erasing %1's def when removing
+# V_CNDMASK_B32.
+
+---
+name: cndmask_undef_extra_use
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_undef_extra_use
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[DEF]], implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0, implicit undef %1:vgpr_32
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = IMPLICIT_DEF
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_NOP 0, implicit undef %1
+ S_BRANCH %bb.4
+
+ bb.4:
+...
+
+---
+name: cndmask_is_undef
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: cndmask_is_undef
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead %0:sreg_64_xexec = S_MOV_B64 0
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, undef %0, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ bb.0:
+ %0:sreg_64_xexec = S_MOV_B64 0
+ S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+
+ bb.1:
+ S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
+ S_BRANCH %bb.4
+
+ bb.2:
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0, implicit $exec
+ %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
+ $vcc = S_AND_B64 $exec, %2, implicit-def $scc
+ S_CBRANCH_VCCNZ %bb.4, implicit $vcc
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_BRANCH %bb.4
+
+ bb.4:
+...
More information about the llvm-commits
mailing list