[llvm] r357805 - [AMDGPU] Add MachineDCE pass after RenameIndependentSubregs

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 5 13:11:32 PDT 2019


Author: rampitec
Date: Fri Apr  5 13:11:32 2019
New Revision: 357805

URL: http://llvm.org/viewvc/llvm-project?rev=357805&view=rev
Log:
[AMDGPU] Add MachineDCE pass after RenameIndependentSubregs

Detect dead lanes can create some dead defs. Then RenameIndependentSubregs
will break a REG_SEQUENCE which may use these dead defs. At this point
a dead instruction can be removed but we do not run a DCE anymore.

MachineDCE was only running before live variable analysis. The patch
adds a mean to preserve LiveIntervals and SlotIndexes in case it works
past this.

Differential Revision: https://reviews.llvm.org/D59626

Added:
    llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
Modified:
    llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
    llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
    llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
    llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
    llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll

Modified: llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp (original)
+++ llvm/trunk/lib/CodeGen/DeadMachineInstructionElim.cpp Fri Apr  5 13:11:32 2019
@@ -10,7 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -32,6 +34,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     const MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
+    LiveIntervals *LIS;
     BitVector LivePhysRegs;
 
   public:
@@ -41,7 +44,7 @@ namespace {
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
+      AU.setPreservesAll();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -78,9 +81,15 @@ bool DeadMachineInstructionElim::isDead(
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         // Don't delete live physreg defs, or any reserved register defs.
-        if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
+        // Do not remove physreg defs if we have LIS as we may be unable
+        // to accurately recompute its liveness.
+        if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg) || LIS)
           return false;
       } else {
+        // An instruction can also use its def in case if it is a tied operand.
+        // TODO: Technically we can also remove it if def dominates the use.
+        //       This can happen when two instructions define different subregs
+        //       of the same register.
         for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
           if (&Use != MI)
             // This def has a non-debug use. Don't delete the instruction!
@@ -102,6 +111,8 @@ bool DeadMachineInstructionElim::runOnMa
   MRI = &MF.getRegInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
+  LIS = getAnalysisIfAvailable<LiveIntervals>();
+  DenseSet<unsigned> RecalcRegs;
 
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
@@ -127,6 +138,14 @@ bool DeadMachineInstructionElim::runOnMa
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
         LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        if (LIS) {
+          for (const MachineOperand &MO : MI->operands()) {
+            if (MO.isReg() && TRI->isVirtualRegister(MO.getReg()))
+              RecalcRegs.insert(MO.getReg());
+          }
+          LIS->RemoveMachineInstrFromMaps(*MI);
+        }
+
         // It is possible that some DBG_VALUE instructions refer to this
         // instruction.  They get marked as undef and will be deleted
         // in the live debug variable analysis.
@@ -170,5 +189,12 @@ bool DeadMachineInstructionElim::runOnMa
   }
 
   LivePhysRegs.clear();
+
+  for (auto Reg : RecalcRegs) {
+    LIS->removeInterval(Reg);
+    if (!MRI->reg_empty(Reg))
+      LIS->createAndComputeVirtRegInterval(Reg);
+  }
+
   return AnyChanges;
 }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Fri Apr  5 13:11:32 2019
@@ -163,6 +163,12 @@ static cl::opt<bool> EnableSIModeRegiste
   cl::init(true),
   cl::Hidden);
 
+// Option is used in lit tests to prevent deadcoding of patterns inspected.
+static cl::opt<bool>
+EnableDCEInRA("amdgpu-dce-in-ra",
+    cl::init(true), cl::Hidden,
+    cl::desc("Enable machine DCE inside regalloc"));
+
 extern "C" void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -901,6 +907,9 @@ void GCNPassConfig::addOptimizedRegAlloc
   // This must be run just after RegisterCoalescing.
   insertPass(&RegisterCoalescerID, &SIPreAllocateWWMRegsID, false);
 
+  if (EnableDCEInRA)
+    insertPass(&RenameIndependentSubregsID, &DeadMachineInstructionElimID);
+
   TargetPassConfig::addOptimizedRegAlloc();
 }
 

Added: llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir?rev=357805&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/dead-lane.mir Fri Apr  5 13:11:32 2019
@@ -0,0 +1,18 @@
+# RUN: llc -march=amdgcn -mcpu=tonga %s -start-before detect-dead-lanes -stop-before machine-scheduler -verify-machineinstrs -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: name: dead_lane
+# GCN:      bb.0:
+# GCN-NEXT: undef %3.sub0:vreg_64 = V_MAC_F32_e32 undef %1:vgpr_32, undef %1:vgpr_32, undef %3.sub0, implicit $exec
+# GCN-NEXT: FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0,
+---
+name:            dead_lane
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    %1:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
+    %2:vgpr_32 = V_MAC_F32_e32 undef %0:vgpr_32, undef %0:vgpr_32, undef %0:vgpr_32, implicit $exec
+    %3:vreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %2:vgpr_32, %subreg.sub1
+    FLAT_STORE_DWORD undef %4:vreg_64, %3.sub0, 0, 0, 0, implicit $exec, implicit $flat_scr
+    S_ENDPGM 0
+
+...

Modified: llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/salu-to-valu.ll Fri Apr  5 13:11:32 2019
@@ -475,6 +475,7 @@ bb2:
 
 bb4:
   %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ]
+  store volatile i32 %tmp5, i32 addrspace(1)* undef
   br label %bb1
 }
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sdwa-peephole.ll Fri Apr  5 13:11:32 2019
@@ -523,5 +523,6 @@ bb2:
 
 bb11:                                             ; preds = %bb10, %bb2
   %tmp12 = phi <2 x i32> [ %tmp6, %bb2 ], [ %tmp, %bb1 ]
+  store volatile <2 x i32> %tmp12, <2 x i32> addrspace(1)* undef
   br label %bb1
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/shrink-carry.mir Fri Apr  5 13:11:32 2019
@@ -21,6 +21,7 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_SUBBREV_U32_e64 0, %0, %3, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
 
 ...
 
@@ -45,6 +46,7 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_SUBB_U32_e64 %0, 0, %3, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
 
 ...
 
@@ -69,6 +71,7 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_ADDC_U32_e64 0, %0, %3, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
 
 ...
 
@@ -93,5 +96,6 @@ body:             |
     %2 = IMPLICIT_DEF
     %3 = V_CMP_GT_U32_e64 %0, %1, implicit $exec
     %4, %5 = V_ADDC_U32_e64 %0, 0, %3, 0, implicit $exec
+    GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, %4, 0, 0, 0, implicit $exec
 
 ...

Modified: llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/spill-empty-live-interval.mir Fri Apr  5 13:11:32 2019
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-dce-in-ra=0 -verify-machineinstrs -stress-regalloc=1 -start-before=simple-register-coalescing -stop-after=greedy -o - %s | FileCheck %s
 # https://bugs.llvm.org/show_bug.cgi?id=33620
 
 ---

Modified: llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll?rev=357805&r1=357804&r2=357805&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll Fri Apr  5 13:11:32 2019
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -amdgpu-dce-in-ra=0 -o - %s | FileCheck %s
 ; Don't crash when the use of an undefined value is only detected by the
 ; register coalescer because it is hidden with subregister insert/extract.
 target triple="amdgcn--"




More information about the llvm-commits mailing list