[llvm] r298025 - SplitKit: Correctly implement partial subregister copies

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 16 17:41:40 PDT 2017


Author: matze
Date: Thu Mar 16 19:41:39 2017
New Revision: 298025

URL: http://llvm.org/viewvc/llvm-project?rev=298025&view=rev
Log:
SplitKit: Correctly implement partial subregister copies

- This fixes a bug where subregister incompatible with the vregs register
  class where used.
- Implement the case where multiple copies are necessary to cover a
  given lanemask.

Differential Revision: https://reviews.llvm.org/D30438

Added:
    llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir
Modified:
    llvm/trunk/lib/CodeGen/SplitKit.cpp
    llvm/trunk/lib/CodeGen/SplitKit.h
    llvm/trunk/lib/CodeGen/VirtRegMap.cpp

Modified: llvm/trunk/lib/CodeGen/SplitKit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.cpp?rev=298025&r1=298024&r2=298025&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.cpp (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.cpp Thu Mar 16 19:41:39 2017
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -487,12 +488,127 @@ void SplitEditor::forceRecompute(unsigne
   VFP = ValueForcePair(nullptr, true);
 }
 
+SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+    unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
+  const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+  bool FirstCopy = !Def.isValid();
+  MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
+      .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
+              | getInternalReadRegState(!FirstCopy), SubIdx)
+      .addReg(FromReg, 0, SubIdx);
+
+  BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+  if (FirstCopy) {
+    SlotIndexes &Indexes = *LIS.getSlotIndexes();
+    Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+    DestLI.createDeadDef(Def, Allocator);
+  } else {
+    CopyMI->bundleWithPred();
+  }
+  LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
+  DestLI.refineSubRanges(Allocator, LaneMask,
+                         [Def, &Allocator](LiveInterval::SubRange& SR) {
+    SR.createDeadDef(Def, Allocator);
+  });
+  return Def;
+}
+
+SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
+    LaneBitmask LaneMask, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
+  const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+  if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
+    // The full vreg is copied.
+    MachineInstr *CopyMI =
+        BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
+    SlotIndexes &Indexes = *LIS.getSlotIndexes();
+    return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+  }
+
+  // Only a subset of lanes needs to be copied. The following is a simple
+  // heuristic to construct a sequence of COPYs. We could add a target
+  // specific callback if this turns out to be suboptimal.
+  LiveInterval &DestLI = LIS.getInterval(Edit->get(RegIdx));
+
+  // First pass: Try to find a perfectly matching subregister index. If none
+  // exists find the one covering the most lanemask bits.
+  SmallVector<unsigned, 8> PossibleIndexes;
+  unsigned BestIdx = 0;
+  unsigned BestCover = 0;
+  const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
+  assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
+  for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) {
+    // Is this index even compatible with the given class?
+    if (TRI.getSubClassWithSubReg(RC, Idx) != RC)
+      continue;
+    LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+    // Early exit if we found a perfect match.
+    if (SubRegMask == LaneMask) {
+      BestIdx = Idx;
+      break;
+    }
+
+    // The index must not cover any lanes outside \p LaneMask.
+    if ((SubRegMask & ~LaneMask).any())
+      continue;
+
+    unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
+    PossibleIndexes.push_back(Idx);
+    if (PopCount > BestCover) {
+      BestCover = PopCount;
+      BestIdx = Idx;
+    }
+  }
+
+  // Abort if we cannot possibly implement the COPY with the given indexes.
+  if (BestIdx == 0)
+    report_fatal_error("Impossible to implement partial COPY");
+
+  SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore,
+                                        BestIdx, DestLI, Late, SlotIndex());
+
+  // Greedy heuristic: Keep iterating keeping the best covering subreg index
+  // each time.
+  LaneBitmask LanesLeft =
+      LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover));
+  while (LanesLeft.any()) {
+    unsigned BestIdx = 0;
+    int BestCover = INT_MIN;
+    for (unsigned Idx : PossibleIndexes) {
+      LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+      // Early exit if we found a perfect match.
+      if (SubRegMask == LanesLeft) {
+        BestIdx = Idx;
+        break;
+      }
+
+      // Try to cover as much of the remaining lanes as possible but
+      // as few of the already covered lanes as possible.
+      int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
+                - countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
+      if (Cover > BestCover) {
+        BestCover = Cover;
+        BestIdx = Idx;
+      }
+    }
+
+    if (BestIdx == 0)
+      report_fatal_error("Impossible to implement partial COPY");
+
+    buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
+                          DestLI, Late, Def);
+    LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx);
+  }
+
+  return Def;
+}
+
 VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
                                    VNInfo *ParentVNI,
                                    SlotIndex UseIdx,
                                    MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I) {
-  MachineInstr *CopyMI = nullptr;
   SlotIndex Def;
   LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
 
@@ -505,45 +621,29 @@ VNInfo *SplitEditor::defFromParent(unsig
   LiveInterval &OrigLI = LIS.getInterval(Original);
   VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
 
+  unsigned Reg = LI->reg;
   bool DidRemat = false;
   if (OrigVNI) {
     LiveRangeEdit::Remat RM(ParentVNI);
     RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
     if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
-      Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+      Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
       ++NumRemats;
       DidRemat = true;
     }
   }
   if (!DidRemat) {
-    // Can't remat, just insert a copy from parent.
-    CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
-               .addReg(Edit->getReg());
-    Def = LIS.getSlotIndexes()
-              ->insertMachineInstrInMaps(*CopyMI, Late)
-              .getRegSlot();
+    LaneBitmask LaneMask;
     if (LI->hasSubRanges()) {
-      LaneBitmask LM = LaneBitmask::getNone();
+      LaneMask = LaneBitmask::getNone();
       for (LiveInterval::SubRange &S : LI->subranges())
-        LM |= S.LaneMask;
-
-      if (MRI.getMaxLaneMaskForVReg(LI->reg) != LM) {
-        // Find subreg for the lane mask.
-        unsigned SubIdx = 0;
-        for (unsigned I = 1, E = TRI.getNumSubRegIndices(); I < E; ++I) {
-          if (TRI.getSubRegIndexLaneMask(I) == LM) {
-            SubIdx = I;
-            break;
-          }
-        }
-        if (SubIdx == 0)
-          report_fatal_error("Cannot find subreg index to cover all alive lanes");
-        CopyMI->getOperand(0).setSubReg(SubIdx);
-        CopyMI->getOperand(1).setSubReg(SubIdx);
-        CopyMI->getOperand(0).setIsUndef(true);
-      }
+        LaneMask |= S.LaneMask;
+    } else {
+      LaneMask = LaneBitmask::getAll();
     }
+
     ++NumCopies;
+    Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
   }
 
   // Define the value in Reg.

Modified: llvm/trunk/lib/CodeGen/SplitKit.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SplitKit.h?rev=298025&r1=298024&r2=298025&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SplitKit.h (original)
+++ llvm/trunk/lib/CodeGen/SplitKit.h Thu Mar 16 19:41:39 2017
@@ -405,6 +405,17 @@ private:
   /// deleteRematVictims - Delete defs that are dead after rematerializing.
   void deleteRematVictims();
 
+  /// Add a copy instruction copying \p FromReg to \p ToReg before
+  /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
+  /// necessary to construct a sequence of copies to cover it exactly.
+  SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+      bool Late, unsigned RegIdx);
+
+  SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+      MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
+      unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy);
+
 public:
   /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
   /// Newly created intervals will be appended to newIntervals.

Modified: llvm/trunk/lib/CodeGen/VirtRegMap.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegMap.cpp?rev=298025&r1=298024&r2=298025&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/VirtRegMap.cpp (original)
+++ llvm/trunk/lib/CodeGen/VirtRegMap.cpp Thu Mar 16 19:41:39 2017
@@ -167,6 +167,7 @@ class VirtRegRewriter : public MachineFu
   bool readsUndefSubreg(const MachineOperand &MO) const;
   void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
   void handleIdentityCopy(MachineInstr &MI) const;
+  void expandCopyBundle(MachineInstr &MI) const;
 
 public:
   static char ID;
@@ -372,6 +373,34 @@ void VirtRegRewriter::handleIdentityCopy
   DEBUG(dbgs() << "  deleted.\n");
 }
 
+/// The liverange splitting logic sometimes produces bundles of copies when
+/// subregisters are involved. Expand these into a sequence of copy instructions
+/// after processing the last in the bundle. Does not update LiveIntervals
+/// which we shouldn't need for this instruction anymore.
+void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
+  if (!MI.isCopy())
+    return;
+
+  if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+    // Only do this when the complete bundle is made out of COPYs.
+    for (MachineBasicBlock::reverse_instr_iterator I =
+         std::next(MI.getReverseIterator()); I->isBundledWithSucc(); ++I) {
+      if (!I->isCopy())
+        return;
+    }
+
+    for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
+         I->isBundledWithPred(); ) {
+      MachineInstr &MI = *I;
+      ++I;
+
+      MI.unbundleFromPred();
+      if (Indexes)
+        Indexes->insertMachineInstrInMaps(MI);
+    }
+  }
+}
+
 void VirtRegRewriter::rewrite() {
   bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<unsigned, 8> SuperDeads;
@@ -463,6 +492,8 @@ void VirtRegRewriter::rewrite() {
 
       DEBUG(dbgs() << "> " << *MI);
 
+      expandCopyBundle(*MI);
+
       // We can remove identity copies right now.
       handleIdentityCopy(*MI);
     }

Added: llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir?rev=298025&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/splitkit.mir Thu Mar 16 19:41:39 2017
@@ -0,0 +1,64 @@
+# RUN: llc -o - %s -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s
+--- |
+  define void @func0() #0 { ret void }
+  define void @func1() #0 { ret void }
+
+  attributes #0 = { "amdgpu-num-sgpr"="12" }
+...
+---
+# Make sure we only get a single spill+reload even if liverange splitting
+# created a sequence of multiple copy instructions.
+# CHECK-LABEL: name: func0
+# CHECK: SI_SPILL_S128_SAVE
+# CHECK-NOT: SI_SPILL_S128_SAVE
+# CHECK: S_NOP 0
+# CHECK: SI_SPILL_S128_RESTORE
+# CHECK-NOT: SI_SPILL_S128_RESTORE
+name: func0
+body: |
+  bb.0:
+    S_NOP 0, implicit-def undef %0.sub0 : sreg_128
+    S_NOP 0, implicit-def %0.sub3 : sreg_128
+
+    ; Clobber registers
+    S_NOP 0, implicit-def dead %sgpr0, implicit-def dead %sgpr1, implicit-def dead %sgpr2, implicit-def dead %sgpr3, implicit-def dead %sgpr4, implicit-def dead %sgpr5, implicit-def dead %sgpr6, implicit-def dead %sgpr7, implicit-def dead %sgpr8, implicit-def dead %sgpr9, implicit-def dead %sgpr10, implicit-def dead %sgpr11
+
+    S_NOP 0, implicit %0.sub0
+    S_NOP 0, implicit %0.sub3
+    S_NOP 0, implicit %0.sub0
+    S_NOP 0, implicit %0.sub3
+...
+---
+# LiveRange splitting should split this into 2 intervals with the second getting
+# allocated to sgpr0_sgpr1 and the first to something else so we see two copies
+# in between for the two subregisters that are alive.
+# CHECK-LABEL: name: func1
+# CHECK: [[REG0:%sgpr[0-9]+]] = COPY %sgpr0
+# CHECK: [[REG1:%sgpr[0-9]+]] = COPY %sgpr2
+# CHECK: S_NOP 0
+# CHECK: S_NOP 0, implicit [[REG0]]
+# CHECK: S_NOP 0, implicit [[REG1]]
+# CHECK: %sgpr0 = COPY [[REG0]]
+# CHECK: %sgpr2 = COPY [[REG1]]
+# CHECK: S_NOP
+# CHECK: S_NOP 0, implicit %sgpr0
+# CHECK: S_NOP 0, implicit %sgpr2
+name: func1
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: %sgpr0, %sgpr1, %sgpr2
+    undef %0.sub0 : sreg_128 = COPY %sgpr0
+    %0.sub2 = COPY %sgpr2
+
+    S_NOP 0, implicit-def dead %sgpr0, implicit-def dead %sgpr1
+
+    S_NOP 0, implicit %0.sub0
+    S_NOP 0, implicit %0.sub2
+
+    ; Clobber everything but sgpr0-sgpr3
+    S_NOP 0, implicit-def dead %sgpr4, implicit-def dead %sgpr5, implicit-def dead %sgpr6, implicit-def dead %sgpr7, implicit-def dead %sgpr8, implicit-def dead %sgpr9, implicit-def dead %sgpr10, implicit-def dead %sgpr11, implicit-def dead %sgpr12, implicit-def dead %sgpr13, implicit-def dead %sgpr14, implicit-def dead %sgpr15, implicit-def dead %vcc_lo, implicit-def dead %vcc_hi
+
+    S_NOP 0, implicit %0.sub0
+    S_NOP 0, implicit %0.sub2
+...




More information about the llvm-commits mailing list