[llvm] c54c760 - Prevent dead uses in register coalescer after rematerialization

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 21 15:20:14 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-07-21T15:19:55-07:00
New Revision: c54c76037b9d7e611fc3c2955b2b9a7934fbb957

URL: https://github.com/llvm/llvm-project/commit/c54c76037b9d7e611fc3c2955b2b9a7934fbb957
DIFF: https://github.com/llvm/llvm-project/commit/c54c76037b9d7e611fc3c2955b2b9a7934fbb957.diff

LOG: Prevent dead uses in register coalescer after rematerialization

The coalescer does not check if register uses are available
at the point of rematerialization. If it attempts to rematerialize
an instruction with such uses it can end up with use without a def.

LiveRangeEdit does such check during rematerialization, so just
call LiveRangeEdit::allUsesAvailableAt() to avoid the problem.

Differential Revision: https://reviews.llvm.org/D106396

Added: 
    llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir

Modified: 
    llvm/include/llvm/CodeGen/LiveRangeEdit.h
    llvm/lib/CodeGen/RegisterCoalescer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index 5b26a44b4ca0..fa4e80179eec 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -97,11 +97,6 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate {
   /// scanRemattable - Identify the Parent values that may rematerialize.
   void scanRemattable(AAResults *aa);
 
-  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
-  /// OrigIdx are also available with the same value at UseIdx.
-  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx) const;
-
   /// foldAsLoad - If LI has a single use and a single def that can be folded as
   /// a load, eliminate the register by folding the def into the use.
   bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr *> &Dead);
@@ -207,6 +202,11 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate {
     explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {}
   };
 
+  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+  /// OrigIdx are also available with the same value at UseIdx.
+  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                          SlotIndex UseIdx) const;
+
   /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
   /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
   /// When cheapAsAMove is set, only cheap remats are allowed.

diff  --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index ce5fa2cca647..7daa67793754 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -201,6 +201,11 @@ namespace {
     /// Recursively eliminate dead defs in DeadDefs.
     void eliminateDeadDefs();
 
+    /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+    /// OrigIdx are also available with the same value at UseIdx.
+    bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                            SlotIndex UseIdx);
+
     /// LiveRangeEdit callback for eliminateDeadDefs().
     void LRE_WillEraseInstruction(MachineInstr *MI) override;
 
@@ -604,6 +609,14 @@ void RegisterCoalescer::eliminateDeadDefs() {
                 nullptr, this).eliminateDeadDefs(DeadDefs);
 }
 
+bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI,
+                                           SlotIndex OrigIdx,
+                                           SlotIndex UseIdx) {
+  SmallVector<Register, 8> NewRegs;
+  return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this)
+      .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx);
+}
+
 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
   // MI may be in WorkList. Make sure we don't visit it.
   ErasedInstrs.insert(MI);
@@ -1343,6 +1356,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
     }
   }
 
+  if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx))
+    return false;
+
   DebugLoc DL = CopyMI->getDebugLoc();
   MachineBasicBlock *MBB = CopyMI->getParent();
   MachineBasicBlock::iterator MII =

diff  --git a/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir
new file mode 100644
index 000000000000..d6f1d89206ba
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir
@@ -0,0 +1,94 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -o - -verify-coalescing -run-pass=simple-register-coalescing %s | FileCheck -check-prefix=GCN %s
+
+---
+# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed.
+
+name:            no_remat_killed_src_in_inst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: no_remat_killed_src_in_inst
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+    ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e32 killed %1, implicit $exec
+    $vgpr0 = COPY killed %2
+    SI_RETURN_TO_EPILOG killed $vgpr0
+...
+---
+# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed
+# after the MOV but before the COPY.
+
+name:            no_remat_killed_src_after_inst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: no_remat_killed_src_after_inst
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+    ; GCN: KILL [[V_ADD_U32_e32_]]
+    ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+    KILL %1
+    $vgpr0 = COPY killed %2
+    SI_RETURN_TO_EPILOG killed $vgpr0
+...
+---
+# Even if %1 is not killed do not rematerialize V_MOV_B32 so that we do not
+# extend %1 liverange.
+
+name:            no_remat_alive_src_in_inst_unused
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: no_remat_alive_src_in_inst_unused
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+    ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+    $vgpr0 = COPY killed %2
+    SI_RETURN_TO_EPILOG killed $vgpr0
+...
+---
+# Rematerialize V_MOV_B32 since %1 is available at COPY and still alive.
+
+name:            remat_alive_src_in_inst_used_and_available
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: remat_alive_src_in_inst_used_and_available
+    ; GCN: liveins: $vgpr0
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+    $vgpr0 = COPY killed %2, implicit %1
+    SI_RETURN_TO_EPILOG killed $vgpr0
+...


        


More information about the llvm-commits mailing list