[llvm] 54c0f52 - [VirtRegRewriter] Insert missing killed flags when tracking subregister liveness

Albion Fung via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 3 09:02:21 PST 2021


Author: Baptiste Saleil
Date: 2021-03-03T12:02:04-05:00
New Revision: 54c0f520c77a189c3869fbb1b9c4ee36561a58ae

URL: https://github.com/llvm/llvm-project/commit/54c0f520c77a189c3869fbb1b9c4ee36561a58ae
DIFF: https://github.com/llvm/llvm-project/commit/54c0f520c77a189c3869fbb1b9c4ee36561a58ae.diff

LOG: [VirtRegRewriter] Insert missing killed flags when tracking subregister liveness

VirtRegRewriter may sometimes fail to correctly apply the kill flag where necessary,
which causes unecessary code gen on PowerPC. This patch fixes the way masks for
defined lanes are computed and the way mask for used lanes is computed.

Contact albion.fung at ibm.com instead of author for problems related to this commit.

Differential Revision: https://reviews.llvm.org/D92405

Added: 
    llvm/test/CodeGen/PowerPC/subreg-killed.mir

Modified: 
    llvm/lib/CodeGen/LiveIntervals.cpp
    llvm/test/CodeGen/AMDGPU/load-global-i16.ll
    llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
    llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
    llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
    llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
    llvm/test/CodeGen/AMDGPU/splitkit.mir
    llvm/test/CodeGen/AMDGPU/subreg_interference.mir
    llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
    llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index a32b486240c8..e858efe5a68c 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -702,9 +702,6 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
 void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
   // Keep track of regunit ranges.
   SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU;
-  // Keep track of subregister ranges.
-  SmallVector<std::pair<const LiveInterval::SubRange*,
-                        LiveRange::const_iterator>, 4> SRs;
 
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     Register Reg = Register::index2VirtReg(i);
@@ -724,14 +721,6 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         continue;
       RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
     }
-
-    if (MRI->subRegLivenessEnabled()) {
-      SRs.clear();
-      for (const LiveInterval::SubRange &SR : LI.subranges()) {
-        SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
-      }
-    }
-
     // Every instruction that kills Reg corresponds to a segment range end
     // point.
     for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE;
@@ -776,20 +765,18 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         // are actually never written by %2. After assignment the <kill>
         // flag at the read instruction is invalid.
         LaneBitmask DefinedLanesMask;
-        if (!SRs.empty()) {
+        if (LI.hasSubRanges()) {
           // Compute a mask of lanes that are defined.
           DefinedLanesMask = LaneBitmask::getNone();
-          for (auto &SRP : SRs) {
-            const LiveInterval::SubRange &SR = *SRP.first;
-            LiveRange::const_iterator &I = SRP.second;
-            if (I == SR.end())
-              continue;
-            I = SR.advanceTo(I, RI->end);
-            if (I == SR.end() || I->start >= RI->end)
-              continue;
-            // I is overlapping RI
-            DefinedLanesMask |= SR.LaneMask;
-          }
+          for (const LiveInterval::SubRange &SR : LI.subranges())
+            for (const LiveRange::Segment &Segment : SR.segments) {
+              if (Segment.start >= RI->end)
+                break;
+              if (Segment.end == RI->end) {
+                DefinedLanesMask |= SR.LaneMask;
+                break;
+              }
+            }
         } else
           DefinedLanesMask = LaneBitmask::getAll();
 
@@ -799,7 +786,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
             continue;
           if (MO.isUse()) {
             // Reading any undefined lanes?
-            LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+            unsigned SubReg = MO.getSubReg();
+            LaneBitmask UseMask = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
+                                         : MRI->getMaxLaneMaskForVReg(Reg);
             if ((UseMask & ~DefinedLanesMask).any())
               goto CancelKill;
           } else if (MO.getSubReg() == 0) {

diff  --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 2f131bab36c8..161f03c4b879 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -7303,8 +7303,7 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill
 ; GCN-NOHSA-SI-NEXT:    buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill
-; GCN-NOHSA-SI-NEXT:    s_waitcnt expcnt(1)
-; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v21, s0, v3
+; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v4, s0, v3
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v32, 16, v5
 ; GCN-NOHSA-SI-NEXT:    v_and_b32_e32 v30, s0, v5
 ; GCN-NOHSA-SI-NEXT:    v_lshrrev_b32_e32 v36, 16, v6
@@ -7345,7 +7344,6 @@ define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v41, v1
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v31, v1
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v33, v1
-; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v4, v21
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v6, v23
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v5, v1
 ; GCN-NOHSA-SI-NEXT:    v_mov_b32_e32 v7, v1

diff  --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
index 821396d79c32..918e009b3bf6 100644
--- a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
+++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
@@ -22,7 +22,7 @@
 # GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF
 # GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr5, $vcc, implicit $exec
 # GCN: $vgpr2 = V_CNDMASK_B32_e64 0, $vgpr0, 0, $vgpr4, killed $vcc, implicit $exec
-# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 $vgpr4_vgpr5, $vgpr0_vgpr1, implicit $exec
+# GCN: $sgpr0_sgpr1 = V_CMP_LT_U64_e64 killed $vgpr4_vgpr5, killed $vgpr0_vgpr1, implicit $exec
 ---
 name:            vgpr64_mixed_use
 tracksRegLiveness: true

diff  --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
index 3b465c5a10fa..09366f814361 100644
--- a/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
+++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign.mir
@@ -367,7 +367,7 @@ body:             |
 
 # GCN-LABEL: vgpr_lo16_sub{{$}}
 # GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
-# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16
+# GCN: renamable $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16
 ---
 name:            vgpr_lo16_sub
 tracksRegLiveness: true
@@ -404,7 +404,7 @@ body: |
 
 # GCN-LABEL: vgpr_hi16_sub{{$}}
 # GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
-# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16
+# GCN: renamable $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16
 ---
 name:            vgpr_hi16_sub
 tracksRegLiveness: true
@@ -441,7 +441,7 @@ body: |
 
 # GCN-LABEL: sgpr_lo16_sub{{$}}
 # GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc
-# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16
+# GCN: renamable $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16
 ---
 name:            sgpr_lo16_sub
 tracksRegLiveness: true
@@ -498,7 +498,7 @@ body: |
 # Test that bank of subreg is considered during scavenging.
 # If handled incorrectly an infinite loop occurs.
 # GCN-LABEL: s0_vs_s15_16_17_sub1{{$}}
-# GCN: S_AND_B32 renamable $sgpr13, $sgpr0,
+# GCN: S_AND_B32 killed renamable $sgpr13, $sgpr0,
 ---
 name:            s0_vs_s15_16_17_sub1
 tracksRegLiveness: true
@@ -531,7 +531,7 @@ body: |
 # GCN: $vgpr40_vgpr41_vgpr42_vgpr43 = IMPLICIT_DEF
 # GCN: $vgpr44_vgpr45_vgpr46_vgpr47 = IMPLICIT_DEF
 # GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, $vgpr11_vgpr12, 0, killed $vgpr16_vgpr17, 0, 0, implicit $mode, implicit $exec
-# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec
+# GCN: $vgpr0_vgpr1 = V_ADD_F64_e64 0, killed $vgpr9_vgpr10, 0, killed $vgpr14_vgpr15, 0, 0, implicit $mode, implicit $exec
 ---
 name:            vgpr_sub_dependence
 tracksRegLiveness: true

diff  --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index d1fa465a66d7..afccbb4b2b71 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -16,7 +16,7 @@
 
 ; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}}
 ; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
-; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x{{[0-9a-f]+}}{{$}}
+; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}}
 ; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
 
 ; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill

diff  --git a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
index 072fbd6977e1..fa907f7f992e 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-nolivesubranges.mir
@@ -19,7 +19,7 @@ body: |
     ; CHECK: liveins: $sgpr0_sgpr1
     ; CHECK: renamable $sgpr0 = IMPLICIT_DEF
     ; CHECK: renamable $sgpr1 = IMPLICIT_DEF
-    ; CHECK: $sgpr104 = S_AND_B32 renamable $sgpr0, renamable $sgpr1, implicit-def $scc
+    ; CHECK: $sgpr104 = S_AND_B32 killed renamable $sgpr0, renamable $sgpr1, implicit-def $scc
     ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
     ; CHECK: renamable $sgpr0_sgpr1 = IMPLICIT_DEF
     ; CHECK: renamable $sgpr0 = IMPLICIT_DEF
@@ -27,7 +27,7 @@ body: |
     ; CHECK: SI_SPILL_S64_SAVE renamable $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sp_reg :: (store 8 into %stack.0, align 4, addrspace 5)
     ; CHECK: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
     ; CHECK: renamable $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sp_reg :: (load 8 from %stack.0, align 4, addrspace 5)
-    ; CHECK: $sgpr105 = S_AND_B32 renamable $sgpr1, renamable $sgpr1, implicit-def $scc
+    ; CHECK: $sgpr105 = S_AND_B32 killed renamable $sgpr1, renamable $sgpr1, implicit-def $scc
     ; CHECK: S_NOP 0, implicit $sgpr104, implicit $sgpr105
     %0:sreg_64 = COPY $sgpr0_sgpr1
     %0.sub0:sreg_64 = IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/splitkit.mir b/llvm/test/CodeGen/AMDGPU/splitkit.mir
index 6f3aac5891f9..8f4de729388c 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit.mir
+++ b/llvm/test/CodeGen/AMDGPU/splitkit.mir
@@ -39,11 +39,11 @@ body: |
 # CHECK: S_NOP 0
 # CHECK: S_NOP 0, implicit renamable [[REG0]]
 # CHECK: S_NOP 0, implicit renamable [[REG1]]
-# CHECK: $sgpr0 = COPY renamable [[REG0]]
+# CHECK: $sgpr0 = COPY killed renamable [[REG0]]
 # CHECK: $sgpr2 = COPY renamable [[REG1]]
 # CHECK: S_NOP
 # CHECK: S_NOP 0, implicit renamable $sgpr0
-# CHECK: S_NOP 0, implicit renamable $sgpr2
+# CHECK: S_NOP 0, implicit killed renamable $sgpr2
 name: func1
 tracksRegLiveness: true
 body: |

diff  --git a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir
index abcff1023496..77f1fc5c5473 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg_interference.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg_interference.mir
@@ -16,8 +16,8 @@
 # CHECK: S_NOP 0, implicit-def renamable $sgpr3
 # CHECK: S_NOP 0, implicit-def renamable $sgpr1
 # CHECK: S_NOP 0, implicit-def renamable $sgpr2
-# CHECK: S_NOP 0, implicit renamable $sgpr0, implicit renamable $sgpr3
-# CHECK: S_NOP 0, implicit renamable $sgpr1, implicit renamable $sgpr2
+# CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit renamable $sgpr3
+# CHECK: S_NOP 0, implicit killed renamable $sgpr1, implicit renamable $sgpr2
 name: func0
 body: |
   bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
index 2f876af6535d..be9f864cfc36 100644
--- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir
@@ -40,24 +40,24 @@ body:             |
     ; CHECK: renamable $sgpr13 = COPY renamable $sgpr5
     ; CHECK: renamable $sgpr14 = COPY renamable $sgpr5
     ; CHECK: renamable $sgpr15 = COPY renamable $sgpr5
-    ; CHECK: renamable $vgpr5_vgpr6 = COPY renamable $sgpr0_sgpr1
+    ; CHECK: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1
     ; CHECK: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0, 0 :: (dereferenceable load 32, addrspace 6)
     ; CHECK: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
     ; CHECK: renamable $sgpr0 = S_MOV_B32 1200
     ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
     ; CHECK: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0, 0 :: (dereferenceable load 32, addrspace 6)
     ; CHECK: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
-    ; CHECK: KILL renamable $sgpr0, renamable $sgpr1
+    ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
     ; CHECK: renamable $sgpr0 = S_MOV_B32 1264
     ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
     ; CHECK: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0, 0 :: (dereferenceable load 32, addrspace 6)
     ; CHECK: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
-    ; CHECK: KILL renamable $sgpr0, renamable $sgpr1
+    ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
     ; CHECK: renamable $sgpr0 = S_MOV_B32 1328
     ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
     ; CHECK: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0, 0 :: (dereferenceable load 32, addrspace 6)
     ; CHECK: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
-    ; CHECK: KILL renamable $sgpr0, renamable $sgpr1
+    ; CHECK: KILL killed renamable $sgpr0, renamable $sgpr1
     ; CHECK: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0, 0 :: (dereferenceable load 32, addrspace 6)
     ; CHECK: renamable $sgpr0 = S_MOV_B32 1392
     ; CHECK: renamable $sgpr1 = COPY renamable $sgpr5
@@ -66,10 +66,10 @@ body:             |
     ; CHECK: renamable $sgpr3 = COPY renamable $sgpr5
     ; CHECK: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0, 0 :: (dereferenceable load 32, addrspace 6)
     ; CHECK: renamable $sgpr4 = S_MOV_B32 1520
-    ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6)
-    ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
-    ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
-    ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
+    ; CHECK: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0, 0 :: (load 16, addrspace 6)
+    ; CHECK: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (load 16, addrspace 6)
+    ; CHECK: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0, 0 :: (load 16, addrspace 6)
+    ; CHECK: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
     ; CHECK: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
     ; CHECK: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
     ; CHECK: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
@@ -100,7 +100,7 @@ body:             |
     ; CHECK: renamable $vgpr0 = V_MAX3_F32_e64 0, killed $vgpr0, 0, killed $vgpr1, 0, killed $vgpr3, 0, 0, implicit $mode, implicit $exec
     ; CHECK: renamable $sgpr0 = nofpexcept V_CMP_GT_F32_e64 0, 1028443341, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
     ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 0, 0, 1065353216, killed $sgpr0, implicit $exec
-    ; CHECK: EXP_DONE 12, killed renamable $vgpr0, renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec
+    ; CHECK: EXP_DONE 12, killed renamable $vgpr0, killed renamable $vgpr2, undef renamable $vgpr0, undef renamable $vgpr0, -1, 0, 15, implicit $exec
     ; CHECK: S_ENDPGM 0
     %0:vgpr_32 = COPY $vgpr0
     undef %1.sub0:sgpr_64 = COPY $sgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
index 8db8ee3db357..c8505ef4427b 100644
--- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
@@ -40,7 +40,7 @@ body:             |
     ; CHECK: $sgpr4 = COPY $sgpr95
     ; CHECK: $vgpr0 = COPY renamable $vgpr40
     ; CHECK: $vgpr1 = COPY renamable $vgpr41
-    ; CHECK: $vgpr2 = COPY renamable $vgpr42
+    ; CHECK: $vgpr2 = COPY killed renamable $vgpr42
     ; CHECK: $vgpr3 = KILL undef renamable $vgpr3
     ; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
     ; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95

diff  --git a/llvm/test/CodeGen/PowerPC/subreg-killed.mir b/llvm/test/CodeGen/PowerPC/subreg-killed.mir
new file mode 100644
index 000000000000..433b9549f8a6
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/subreg-killed.mir
@@ -0,0 +1,40 @@
+# RUN: llc -mcpu=pwr10 -O3 -ppc-track-subreg-liveness -verify-machineinstrs \
+# RUN:   -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \
+# RUN:   -o - %s | FileCheck %s
+
+# This test case checks that the 'killed' flag is properly added when using
+# subregisters.
+
+# CHECK-LABEL: test
+# CHECK: KILL_PAIR killed
+# CHECK-NEXT: COPY killed
+# CHECK-NEXT: KILL_PAIR killed
+# CHECK-NEXT: COPY killed
+# CHECK-NEXT: BUILD_UACC killed
+# CHECK-NEXT: XXMTACC killed
+# CHECK-NEXT: SPILL_ACC killed
+
+---
+name:            test
+tracksRegLiveness: true
+fixedStack:
+  - { id: 0, size: 8 }
+stack:
+  - { id: 0, size: 64 }
+body:             |
+  bb.0:
+    liveins: $v2, $v3, $v4, $v5
+    undef %4.sub_vsx1:vsrprc_with_sub_64_in_vfrc = COPY $v5
+    %4.sub_vsx0:vsrprc_with_sub_64_in_vfrc = COPY $v4
+    undef %5.sub_vsx1:vsrprc_with_sub_64_in_vfrc = COPY $v3
+    %5.sub_vsx0:vsrprc_with_sub_64_in_vfrc = COPY $v2
+    %6:g8rc_and_g8rc_nox0 = LD 0, %fixed-stack.0
+    %5:vsrprc_with_sub_64_in_vfrc = KILL_PAIR %5
+    undef %7.sub_pair0:uaccrc = COPY %5
+    %4:vsrprc_with_sub_64_in_vfrc = KILL_PAIR %4
+    %7.sub_pair1:uaccrc = COPY %4
+    %8:accrc = BUILD_UACC %7
+    %8:accrc = XXMTACC %8
+    SPILL_ACC %8, 0, %stack.0
+
+...


        


More information about the llvm-commits mailing list