[llvm-branch-commits] [llvm] 60eba81 - RegisterCoalescer: Remove phi-only subranges when erasing identity copies

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Dec 15 14:40:40 PST 2020


Author: Matt Arsenault
Date: 2020-12-15T17:36:32-05:00
New Revision: 60eba8161bd314eaf02952deaa023c334fcca080

URL: https://github.com/llvm/llvm-project/commit/60eba8161bd314eaf02952deaa023c334fcca080
DIFF: https://github.com/llvm/llvm-project/commit/60eba8161bd314eaf02952deaa023c334fcca080.diff

LOG: RegisterCoalescer: Remove phi-only subranges when erasing identity copies

Undef subranges are not present in the live range values, except when
they cross block boundaries. In this situation, a identity copy is
inside a loop, and one of the lanes is undefined. It only appears
alive inside the loop due to the copy. Once the copy was erased, it
would leave behind a segment inside the loop body with no
corresponding def anywhere in the program.

When RenameIndependentSubregs processed this dummy interval, it would
introduce a "Multiple connected components in live interval" verifier
error when IMPLICIT_DEFs were added to the other two blocks. I believe
there is a missing verifier check for this type of dummy interval.

I have found additional cases from the same fundamental problem in
other areas I haven't managed to fix yet (e.g. the commented out
prune_subrange_phi_value_* cases).

Added: 
    llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir

Modified: 
    llvm/lib/CodeGen/RegisterCoalescer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 7deabe6761d9..5fee663d8978 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -211,6 +211,18 @@ namespace {
     /// live interval update is costly.
     void lateLiveIntervalUpdate();
 
+    /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
+    /// has no value defined in the predecessors. If the incoming value is the
+    /// same as defined by the copy itself, the value is considered undefined.
+    bool copyValueUndefInPredecessors(LiveRange &S,
+                                      const MachineBasicBlock *MBB,
+                                      LiveQueryResult SLRQ);
+
+    /// Set necessary undef flags on subregister uses after pruning out undef
+    /// lane segments from the subrange.
+    void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
+                                    LaneBitmask PrunedLanes);
+
     /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
     /// src/dst of the copy instruction CopyMI.  This returns true if the copy
     /// was successfully coalesced away. If it is not currently possible to
@@ -1809,6 +1821,49 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
   return false;
 }
 
+bool RegisterCoalescer::copyValueUndefInPredecessors(
+    LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
+  for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+    SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
+    if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
+      // If this is a self loop, we may be reading the same value.
+      if (V->id != SLRQ.valueOutOrDead()->id)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
+                                                   Register Reg,
+                                                   LaneBitmask PrunedLanes) {
+  // If we had other instructions in the segment reading the undef sublane
+  // value, we need to mark them with undef.
+  for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+    unsigned SubRegIdx = MO.getSubReg();
+    if (SubRegIdx == 0 || MO.isUndef())
+      continue;
+
+    LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+    SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+    for (LiveInterval::SubRange &S : LI.subranges()) {
+      if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
+        MO.setIsUndef();
+        break;
+      }
+    }
+  }
+
+  LI.removeEmptySubRanges();
+
+  // A def of a subregister may be a use of other register lanes. Replacing
+  // such a def with a def of a 
diff erent register will eliminate the use,
+  // and may cause the recorded live range to be larger than the actual
+  // liveness in the program IR.
+  LIS->shrinkToUses(&LI);
+}
+
 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   Again = false;
   LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
@@ -1868,16 +1923,35 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
       VNInfo *ReadVNI = LRQ.valueIn();
       assert(ReadVNI && "No value before copy and no <undef> flag.");
       assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
-      LI.MergeValueNumberInto(DefVNI, ReadVNI);
+
+      // Track incoming undef lanes we need to eliminate from the subrange.
+      LaneBitmask PrunedLanes;
+      MachineBasicBlock *MBB = CopyMI->getParent();
 
       // Process subregister liveranges.
       for (LiveInterval::SubRange &S : LI.subranges()) {
         LiveQueryResult SLRQ = S.Query(CopyIdx);
         if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
-          VNInfo *SReadVNI = SLRQ.valueIn();
-          S.MergeValueNumberInto(SDefVNI, SReadVNI);
+          if (VNInfo *SReadVNI = SLRQ.valueIn())
+            SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
+
+          // If this copy introduced an undef subrange from an incoming value,
+          // we need to eliminate the undef live in values from the subrange.
+          if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
+            LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
+            PrunedLanes |= S.LaneMask;
+            S.removeValNo(SDefVNI);
+          }
         }
       }
+
+      LI.MergeValueNumberInto(DefVNI, ReadVNI);
+      if (PrunedLanes.any()) {
+        LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
+                          << PrunedLanes << '\n');
+        setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
+      }
+
       LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
     }
     deleteInstr(CopyMI);

diff  --git a/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir b/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir
new file mode 100644
index 000000000000..2dc2f6199f0b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir
@@ -0,0 +1,338 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-coalescing -verify-machineinstrs -start-before=simple-register-coalescing -stop-after=machine-scheduler -o - %s | FileCheck %s
+
+# Tests that break due to the handling of partially undef registers
+# when whole register identity copies are erased.
+
+# Make sure there is no verifier error after
+# RenameIndepependentSubregs processes this. The coalescer would
+# remove the identity copy in %bb.1, and leave behind a dummy interval
+# across bb.1 with no corresponding value anywhere in the function.
+
+---
+name:            identity_copy_undef_subrange
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: identity_copy_undef_subrange
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:vreg_64 = COPY %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+...
+
+# Test another use of the register inside the block.
+---
+name:            identity_copy_undef_subrange_other_uses0
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: identity_copy_undef_subrange_other_uses0
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_NOP 0, implicit undef %0.sub0
+  ; CHECK:   S_NOP 0, implicit undef %0.sub0
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:vreg_64 = COPY %0
+    S_NOP 0, implicit %0.sub0
+    S_NOP 0, implicit %0.sub0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+...
+
+---
+name: second_identity_copy
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: second_identity_copy
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   undef %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:vreg_64 = COPY killed %0
+    %0:vreg_64 = COPY %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    undef %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, killed %0.sub1, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+...
+
+---
+name: second_identity_copy_undef_lane_outblock
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: second_identity_copy_undef_lane_outblock
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:vreg_64 = COPY killed %0
+    %0:vreg_64 = COPY %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, killed %0.sub1, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+...
+
+# The same value number appears in multiple blocks
+---
+name: identity_copy_undef_subrange_null_vninfo_to_remove
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: identity_copy_undef_subrange_null_vninfo_to_remove
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   S_NOP 0, implicit undef %0.sub0
+  ; CHECK:   undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  ; CHECK: bb.3:
+  ; CHECK:   S_NOP 0, implicit undef %0.sub0
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    %0:vreg_64 = COPY %0
+    S_CBRANCH_EXECNZ %bb.3, implicit $exec
+
+  bb.2:
+    S_NOP 0, implicit %0.sub0
+    undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.3:
+    S_NOP 0, implicit %0.sub0
+
+...
+
+---
+name: undef_copy_self_loop0
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: undef_copy_self_loop0
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK: bb.2:
+  ; CHECK:   S_NOP 0, implicit undef %0.sub0
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    %0:vreg_64 = COPY %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.2:
+    S_NOP 0, implicit %0.sub0
+
+...
+
+---
+name: undef_copy_self_loop1
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: undef_copy_self_loop1
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK: bb.2:
+  ; CHECK:   S_NOP 0, implicit %0.sub1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    %0:vreg_64 = COPY %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.2:
+    S_NOP 0, implicit %0.sub1
+
+...
+
+# FIXME: This testcase is broken
+# The coalescing of the %0 = %2 COPY in %bb.2 needs to prune the dead
+# phi range across %bb.1 after it is erased.
+# ---
+# name: prune_subrange_phi_value_0
+# tracksRegLiveness: true
+# body:             |
+#   bb.0:
+#     liveins: $vgpr0
+
+#     undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+#   bb.1:
+#     successors: %bb.2, %bb.1
+
+#     %1:vreg_64 = COPY killed %0
+#     %0:vreg_64 = COPY %1
+#     S_CBRANCH_EXECNZ %bb.1, implicit $exec
+#     S_BRANCH %bb.2
+
+#   bb.2:
+#     undef %2.sub1:vreg_64 = COPY %0.sub1
+#     %0:vreg_64 = COPY killed %2
+#     S_BRANCH %bb.1
+
+# ...
+
+# Variant of testcase that asserts since there wasn't already an
+# incoming segment at the erased copy, and no valid end point.
+# ---
+# name:            prune_subrange_phi_value_1
+# tracksRegLiveness: true
+# body:             |
+#   bb.0:
+#     liveins: $vgpr0
+
+#     undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+#   bb.1:
+#     successors: %bb.2, %bb.1
+
+#     %0:vreg_64 = COPY killed %0
+#     S_CBRANCH_EXECNZ %bb.1, implicit $exec
+#     S_BRANCH %bb.2
+
+#   bb.2:
+#     undef %1.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
+#     %0:vreg_64 = COPY killed %1
+#     S_BRANCH %bb.1
+
+# ...
+
+---
+name:            prune_subrange_phi_value_2
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: prune_subrange_phi_value_2
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:vreg_64 = COPY killed %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+...


        


More information about the llvm-branch-commits mailing list