[llvm] d886da0 - RegisterCoalescer: Prune undef subranges from copy pairs in loops

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 3 10:42:59 PST 2021


Author: Matt Arsenault
Date: 2021-02-03T13:42:53-05:00
New Revision: d886da042c65e7bac3dc77215abf2f29b6a87de6

URL: https://github.com/llvm/llvm-project/commit/d886da042c65e7bac3dc77215abf2f29b6a87de6
DIFF: https://github.com/llvm/llvm-project/commit/d886da042c65e7bac3dc77215abf2f29b6a87de6.diff

LOG: RegisterCoalescer: Prune undef subranges from copy pairs in loops

If we had a pair of copies inside a loop which introduced new liveness
to a subregister which was undef before the loop, we would have a
dummy phi-only segment remaining across the loop body. Later, this
false segment would confuse RenameIndependentSubregs causing it to
introduce IMPLICIT_DEFs with broken value numbering.

It seems always adding the lanes to ShrinkMask is OK, so any
conditions should be purely a compile time filter.

Added: 
    llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll

Modified: 
    llvm/lib/CodeGen/RegisterCoalescer.cpp
    llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 7fdc85a6e444..6b87681528ee 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3114,6 +3114,13 @@ void JoinVals::pruneValues(JoinVals &Other,
   }
 }
 
+// Check if the segment consists of a copied live-through value (i.e. the copy
+// in the block only extended the liveness, of an undef value which we may need
+// to handle).
+static bool isLiveThrough(const LiveQueryResult Q) {
+  return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
+}
+
 /// Consider the following situation when coalescing the copy between
 /// %31 and %45 at 800. (The vertical lines represent live range segments.)
 ///
@@ -3196,11 +3203,21 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
           // with V.OtherVNI.
           LIS->extendToIndices(S, EndPoints);
         }
+
+        // We may need to eliminate the subrange if the copy introduced a live
+        // out undef value.
+        if (ValueOut->isPHIDef())
+          ShrinkMask |= S.LaneMask;
         continue;
       }
+
       // If a subrange ends at the copy, then a value was copied but only
       // partially used later. Shrink the subregister range appropriately.
-      if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
+      //
+      // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
+      // conservatively correct.
+      if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
+          (V.Resolution == CR_Erase && isLiveThrough(Q))) {
         LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
                           << PrintLaneMask(S.LaneMask) << " at " << Def
                           << "\n");

diff  --git a/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir b/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir
index 2dc2f6199f0b..5dfc7a0f2873 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir
@@ -250,57 +250,150 @@ body:             |
 
 ...
 
-# FIXME: This testcase is broken
 # The coalescing of the %0 = %2 COPY in %bb.2 needs to prune the dead
 # phi range across %bb.1 after it is erased.
-# ---
-# name: prune_subrange_phi_value_0
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     liveins: $vgpr0
+---
+name: prune_subrange_phi_value_0
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: prune_subrange_phi_value_0
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %2.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
 
-#     undef %0.sub1:vreg_64 = COPY killed $vgpr0
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
 
-#   bb.1:
-#     successors: %bb.2, %bb.1
+  bb.1:
+    successors: %bb.2, %bb.1
 
-#     %1:vreg_64 = COPY killed %0
-#     %0:vreg_64 = COPY %1
-#     S_CBRANCH_EXECNZ %bb.1, implicit $exec
-#     S_BRANCH %bb.2
+    %1:vreg_64 = COPY killed %0
+    %0:vreg_64 = COPY %1
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
 
-#   bb.2:
-#     undef %2.sub1:vreg_64 = COPY %0.sub1
-#     %0:vreg_64 = COPY killed %2
-#     S_BRANCH %bb.1
+  bb.2:
+    undef %2.sub1:vreg_64 = COPY %0.sub1
+    %0:vreg_64 = COPY killed %2
+    S_BRANCH %bb.1
 
-# ...
+...
+
+---
+name: prune_subrange_phi_value_0_0
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: prune_subrange_phi_value_0_0
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %1:vreg_64 = COPY killed %0
+    %0:vreg_64 = COPY %1
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    undef %0.sub1:vreg_64 = COPY %0.sub1
+    S_BRANCH %bb.1
+
+...
+
+---
+name: prune_subrange_phi_value_0_1
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: prune_subrange_phi_value_0_1
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
+
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %1:vreg_64 = COPY killed %0
+    %0:vreg_64 = COPY %1
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_BRANCH %bb.1
+
+...
 
 # Variant of testcase that asserts since there wasn't already an
 # incoming segment at the erased copy, and no valid end point.
-# ---
-# name:            prune_subrange_phi_value_1
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     liveins: $vgpr0
+---
+name:            prune_subrange_phi_value_1
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: prune_subrange_phi_value_1
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $vgpr0
+  ; CHECK:   undef %0.sub1:vreg_64 = COPY $vgpr0
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec
+  ; CHECK:   S_BRANCH %bb.1
+  bb.0:
+    liveins: $vgpr0
 
-#     undef %0.sub1:vreg_64 = COPY killed $vgpr0
+    undef %0.sub1:vreg_64 = COPY killed $vgpr0
 
-#   bb.1:
-#     successors: %bb.2, %bb.1
+  bb.1:
+    successors: %bb.2, %bb.1
 
-#     %0:vreg_64 = COPY killed %0
-#     S_CBRANCH_EXECNZ %bb.1, implicit $exec
-#     S_BRANCH %bb.2
+    %0:vreg_64 = COPY killed %0
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
 
-#   bb.2:
-#     undef %1.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
-#     %0:vreg_64 = COPY killed %1
-#     S_BRANCH %bb.1
+  bb.2:
+    undef %1.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec
+    %0:vreg_64 = COPY killed %1
+    S_BRANCH %bb.1
 
-# ...
+...
 
 ---
 name:            prune_subrange_phi_value_2

diff  --git a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
new file mode 100644
index 000000000000..ccbfa6b03d87
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s
+
+; This example used to produce a verifier error resulting from the
+; register coalescer leaving behind a false live interval when a live
+; out copy introduced new liveness for a subregister.
+
+define <3 x float> @liveout_undef_subrange(<3 x float> %arg) {
+; CHECK-LABEL: liveout_undef_subrange:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_add_f32_e32 v3, v2, v2
+; CHECK-NEXT:    v_add_f32_e32 v1, v1, v1
+; CHECK-NEXT:    v_add_f32_e32 v0, v0, v0
+; CHECK-NEXT:  BB0_1: ; %bb1
+; CHECK-NEXT:    ; =>This Loop Header: Depth=1
+; CHECK-NEXT:    ; Child Loop BB0_2 Depth 2
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  BB0_2: ; %bb1
+; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v2
+; CHECK-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; CHECK-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; CHECK-NEXT:    s_cbranch_execnz BB0_2
+; CHECK-NEXT:  ; %bb.3: ; %bb2
+; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT:    v_mul_f32_e32 v2, v3, v2
+; CHECK-NEXT:    s_branch BB0_1
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb3, %bb
+  %i = phi <3 x float> [ %arg, %bb ], [ %i11, %bb3 ]
+  %i2 = extractelement <3 x float> %i, i64 2
+  %i3 = fmul float %i2, 1.000000e+00
+  %i4 = fmul nsz <3 x float> %arg, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  %i5 = insertelement <3 x float> undef, float %i3, i32 0
+  %i6 = shufflevector <3 x float> %i5, <3 x float> undef, <3 x i32> zeroinitializer
+  %i7 = fmul <3 x float> %i4, %i6
+  %i8 = fcmp oeq float %i3, 0.000000e+00
+  br i1 %i8, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  br label %bb3
+
+bb3:                                             ; preds = %bb2, %bb1
+  %i11 = phi <3 x float> [ %i7, %bb2 ], [ %i, %bb1 ]
+  br label %bb1
+}


        


More information about the llvm-commits mailing list