[llvm] r357805 - [AMDGPU] Add MachineDCE pass after RenameIndependentSubregs
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 5 13:11:32 PDT 2019
Author: rampitec
Date: Fri Apr 5 13:11:32 2019
New Revision: 357805
URL: http://llvm.org/viewvc/llvm-project?rev=357805&view=rev
Log:
[AMDGPU] Add MachineDCE pass after RenameIndependentSubregs
Detect dead lanes can create some dead defs. Then RenameIndependentSubregs
will break a REG_SEQUENCE which may use these dead defs. At this point
a dead instruction can be removed but we do not run a DCE anymore.
MachineDCE was only running before live variable analysis. The patch
adds a mean to preserve LiveIntervals and SlotIndexes in case it works
past this.
Differential Revision: https://reviews.llvm.org/D59626
Added:
llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
Modified:
llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
Modified: llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp (original)
+++ llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp Fri Apr 5 13:11:32 2019
@@ -10,7 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -32,6 +34,7 @@ namespace {
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
BitVector LivePhysRegs;
public:
@@ -41,7 +44,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
+ AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -78,9 +81,15 @@ bool DeadMachineInstructionElim::isDead(
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// Don't delete live physreg defs, or any reserved register defs.
- if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
+ // Do not remove physreg defs if we have LIS as we may be unable
+ // to accurately recompute its liveness.
+ if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS)
return false;
} else {
+ // An instruction can also use its def in case if it is a tied operand.
+ // TODO: Technically we can also remove it if def dominates the use.
+ // This can happen when two instructions define different subregs
+ // of the same register.
for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
if (&Use != MI)
// This def has a non-debug use. Don't delete the instruction!
@@ -102,6 +111,8 @@ bool DeadMachineInstructionElim::runOnMa
MRI = &MF.getRegInfo();
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ DenseSet<unsigned> RecalcRegs;
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -127,6 +138,14 @@ bool DeadMachineInstructionElim::runOnMa
// If the instruction is dead, delete it!
if (isDead(MI)) {
LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ if (LIS) {
+ for (const MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && TRI->isVirtualRegister(MO.getReg()))
+ RecalcRegs.insert(MO.getReg());
+ }
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ }
+
// It is possible that some DBG_VALUE instructions refer to this
// instruction. They get marked as undef and will be deleted
// in the live debug variable analysis.
@@ -170,5 +189,12 @@ bool DeadMachineInstructionElim::runOnMa
}
LivePhysRegs.clear();
+
+ for (auto Reg : RecalcRegs) {
+ LIS->removeInterval(Reg);
+ if (!MRI->reg_empty(Reg))
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+
return AnyChanges;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Apr 5 13:11:32 2019
@@ -163,6 +163,12 @@ static cl::opt<bool> EnableSIModeRegiste
cl::init(true),
cl::Hidden);
+// Option is used in lit tests to prevent deadcoding of patterns inspected.
+static cl::opt<bool>
+EnableDCEInRA("amdgpu-dce-in-ra",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable machine DCE inside regalloc"));
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -901,6 +907,9 @@ void GCNPassConfig::addOptimizedRegAlloc
// This must be run just after RegisterCoalescing.
insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
+ if (EnableDCEInRA)
+ insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
+
TargetPassConfig::addOptimizedRegAlloc();
}
Added: llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir?rev=357805&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir Fri Apr 5 13:11:32 2019
@@ -0,0 +1,18 @@
+# RUN: llc -march=amdgcn -mcpu=tonga %s -start-before detect-dead-lanes -stop-before machine-scheduler -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: dead_lane
+# GCN: bb.0:
+# GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $exec
+# GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0,
+---
+name: dead_lane
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
+ %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
+ %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
+ FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ S_ENDPGM 0
+
+...
Modified: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll Fri Apr 5 13:11:32 2019
@@ -475,6 +475,7 @@ bb2:
bb4:
%tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ]
+ store volatile i32 %tmp5, i32 addrspace(1)* undef
br label %bb1
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Fri Apr 5 13:11:32 2019
@@ -523,5 +523,6 @@ bb2:
bb11: ; preds = %bb10, %bb2
%tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ]
+ store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
br label %bb1
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir Fri Apr 5 13:11:32 2019
@@ -21,6 +21,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
...
@@ -45,6 +46,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
...
@@ -69,6 +71,7 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
...
@@ -93,5 +96,6 @@ body: |
%2 = IMPLICIT_DEF
%3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
%4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
...
Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir Fri Apr 5 13:11:32 2019
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-dce-in-ra=0 -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
# https://bugs.llvm.org/show_bug.cgi?id=33620
---
Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll Fri Apr 5 13:11:32 2019
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
; Don't crash when the use of an undefined value is only detected by the
; register coalescer because it is hidden with subregister insert/extract.
target triple="amdgcn--"
More information about the llvm-commits
mailing list