[llvm] 08803f0 - Unbundle KILL bundles in VirtRegRewriter

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 10 11:58:53 PDT 2020


Author: Stanislav Mekhanoshin
Date: 2020-08-10T11:58:37-07:00
New Revision: 08803f0e62e82539756b94e2655dabdbdab39014

URL: https://github.com/llvm/llvm-project/commit/08803f0e62e82539756b94e2655dabdbdab39014
DIFF: https://github.com/llvm/llvm-project/commit/08803f0e62e82539756b94e2655dabdbdab39014.diff

LOG: Unbundle KILL bundles in VirtRegRewriter

SplitKit forms invalid COPY subreg bundles without a leading
BUNDLE instruction. That manifests itself in post-RA scheduler
counting instruction and asserting on "Instruction count mismatch".

The bundle shall be undone by VirtRegRewriter::expandCopyBundle(),
but it does not because VirtRegRewriter::handleIdentityCopy() can
turn COPY bundle into a KILL bundle.

Process KILLs as well.

Differential Revision: https://reviews.llvm.org/D85484

Added: 
    llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir

Modified: 
    llvm/lib/CodeGen/VirtRegMap.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 2c83f13b651b..4837cf11bc33 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -400,18 +400,18 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
 /// after processing the last in the bundle. Does not update LiveIntervals
 /// which we shouldn't need for this instruction anymore.
 void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
-  if (!MI.isCopy())
+  if (!MI.isCopy() && !MI.isKill())
     return;
 
   if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
     SmallVector<MachineInstr *, 2> MIs({&MI});
 
-    // Only do this when the complete bundle is made out of COPYs.
+    // Only do this when the complete bundle is made out of COPYs and KILLs.
     MachineBasicBlock &MBB = *MI.getParent();
     for (MachineBasicBlock::reverse_instr_iterator I =
          std::next(MI.getReverseIterator()), E = MBB.instr_rend();
          I != E && I->isBundledWithSucc(); ++I) {
-      if (!I->isCopy())
+      if (!I->isCopy() && !I->isKill())
         return;
       MIs.push_back(&*I);
     }

diff  --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
new file mode 100644
index 000000000000..dca3150b404c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir
@@ -0,0 +1,113 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,RA %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,VR %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=ASM %s
+
+---
+# MIR-LABEL: name: splitkit_copy_bundle
+
+# RA:      undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
+# RA-NEXT:       internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
+# RA-NEXT:       internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29
+# RA-NEXT: }
+
+# RA:      undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
+# RA-NEXT:       internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
+# RA-NEXT:       internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29
+# RA-NEXT: }
+
+
+# RA:      undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 {
+# RA-NEXT:       internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27
+# RA-NEXT:       internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29
+# RA-NEXT: }
+
+
+# VR:         renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+# VR-NEXT:    renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97
+
+# ASM-LABEL: {{^}}splitkit_copy_bundle:
+# ASM:      ; implicit-def: $sgpr34_sgpr35
+# ASM-NEXT: ; implicit-def: $sgpr98_sgpr99
+# ASM-NEXT: ; kill: def $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 killed $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
+# ASM-NEXT: ; kill: def $sgpr96_sgpr97 killed $sgpr96_sgpr97
+
+name:            splitkit_copy_bundle
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    %0:sreg_64 = IMPLICIT_DEF
+    %1:sreg_64 = IMPLICIT_DEF
+    undef %2.sub1:sgpr_1024 = S_MOV_B32 -1
+    %2.sub0:sgpr_1024 = S_MOV_B32 -1
+    undef %3.sub0:sgpr_1024 = S_MOV_B32 0
+
+  bb.1:
+    %2.sub2:sgpr_1024 = COPY %2.sub0
+    %2.sub3:sgpr_1024 = COPY %2.sub1
+    %2.sub4:sgpr_1024 = COPY %2.sub0
+    %2.sub5:sgpr_1024 = COPY %2.sub1
+    %2.sub6:sgpr_1024 = COPY %2.sub0
+    %2.sub7:sgpr_1024 = COPY %2.sub1
+    %2.sub8:sgpr_1024 = COPY %2.sub0
+    %2.sub9:sgpr_1024 = COPY %2.sub1
+    %2.sub10:sgpr_1024 = COPY %2.sub0
+    %2.sub11:sgpr_1024 = COPY %2.sub1
+    %2.sub12:sgpr_1024 = COPY %2.sub0
+    %2.sub13:sgpr_1024 = COPY %2.sub1
+    %2.sub14:sgpr_1024 = COPY %2.sub0
+    %2.sub15:sgpr_1024 = COPY %2.sub1
+    %2.sub16:sgpr_1024 = COPY %2.sub0
+    %2.sub17:sgpr_1024 = COPY %2.sub1
+    %2.sub18:sgpr_1024 = COPY %2.sub0
+    %2.sub19:sgpr_1024 = COPY %2.sub1
+    %2.sub20:sgpr_1024 = COPY %2.sub0
+    %2.sub21:sgpr_1024 = COPY %2.sub1
+    %2.sub22:sgpr_1024 = COPY %2.sub0
+    %2.sub23:sgpr_1024 = COPY %2.sub1
+    %2.sub24:sgpr_1024 = COPY %2.sub0
+    %2.sub25:sgpr_1024 = COPY %2.sub1
+    %2.sub26:sgpr_1024 = COPY %2.sub0
+    %2.sub27:sgpr_1024 = COPY %2.sub1
+    %2.sub28:sgpr_1024 = COPY %2.sub0
+    %2.sub29:sgpr_1024 = COPY %2.sub1
+    %3.sub1:sgpr_1024 = COPY %3.sub0
+    %3.sub2:sgpr_1024 = COPY %3.sub0
+    %3.sub3:sgpr_1024 = COPY %3.sub0
+    %3.sub4:sgpr_1024 = COPY %3.sub0
+    %3.sub5:sgpr_1024 = COPY %3.sub0
+    %3.sub6:sgpr_1024 = COPY %3.sub0
+    %3.sub7:sgpr_1024 = COPY %3.sub0
+    %3.sub8:sgpr_1024 = COPY %3.sub0
+    %3.sub9:sgpr_1024 = COPY %3.sub0
+    %3.sub10:sgpr_1024 = COPY %3.sub0
+    %3.sub11:sgpr_1024 = COPY %3.sub0
+    %3.sub12:sgpr_1024 = COPY %3.sub0
+    %3.sub13:sgpr_1024 = COPY %3.sub0
+    %3.sub14:sgpr_1024 = COPY %3.sub0
+    %3.sub15:sgpr_1024 = COPY %3.sub0
+    %3.sub16:sgpr_1024 = COPY %3.sub0
+    %3.sub17:sgpr_1024 = COPY %3.sub0
+    %3.sub18:sgpr_1024 = COPY %3.sub0
+    %3.sub19:sgpr_1024 = COPY %3.sub0
+    %3.sub20:sgpr_1024 = COPY %3.sub0
+    %3.sub21:sgpr_1024 = COPY %3.sub0
+    %3.sub22:sgpr_1024 = COPY %3.sub0
+    %3.sub23:sgpr_1024 = COPY %3.sub0
+    %3.sub24:sgpr_1024 = COPY %3.sub0
+    %3.sub25:sgpr_1024 = COPY %3.sub0
+    %3.sub26:sgpr_1024 = COPY %3.sub0
+    %3.sub27:sgpr_1024 = COPY %3.sub0
+    %3.sub28:sgpr_1024 = COPY %3.sub0
+    %3.sub29:sgpr_1024 = COPY %3.sub0
+    %3.sub30:sgpr_1024 = COPY %3.sub0
+    %3.sub31:sgpr_1024 = COPY %3.sub0
+
+  bb.2:
+    S_NOP 0, implicit %0, implicit %1, csr_amdgpu_highregs
+    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+...


        


More information about the llvm-commits mailing list