[llvm] r232262 - [MachineLICM] First steps of sinking GEPs near calls.

Sat Mar 14 03:58:39 PDT 2015

Author: djasper
Date: Sat Mar 14 05:58:38 2015
New Revision: 232262

URL: http://llvm.org/viewvc/llvm-project?rev=232262&view=rev
Log:
[MachineLICM] First steps of sinking GEPs near calls.

Specifically, if there are copy-like instructions in the loop header
they are moved into the loop close to their uses. This reduces the live
intervals of the values and can avoid register spills.

This is working towards a fix for http://llvm.org/PR22230.
Review: http://reviews.llvm.org/D7259

Next steps:
- Find a better cost model (which non-copy instructions should be sunk?)
- Make this dependent on register pressure

Added:
    llvm/trunk/test/CodeGen/X86/sink-cheap-instructions.ll
Modified:
    llvm/trunk/lib/CodeGen/MachineLICM.cpp

Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=232262&r1=232261&r2=232262&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Sat Mar 14 05:58:38 2015
@@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts",
                 cl::desc("MachineLICM should hoist even cheap instructions"),
                 cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+                       cl::desc("MachineLICM should sink instructions into "
+                                "loops to avoid register spills"),
+                       cl::init(false), cl::Hidden);
+
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
 STATISTIC(NumLowRP,
@@ -243,6 +249,11 @@ namespace {
     void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
     void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
 
+    /// SinkIntoLoop - Sink instructions into loops if profitable. This
+    /// especially tries to prevent register spills caused by register pressure
+    /// if there is little to no overhead moving instructions into loops.
+    void SinkIntoLoop();
+
     /// getRegisterClassIDAndCost - For a given MI, register, and the operand
     /// index, return the ID and cost of its representative register class by
     /// reference.
@@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(M
       FirstInLoop = true;
       HoistOutOfLoop(N);
       CSEMap.clear();
+
+      if (SinkInstsToAvoidSpills)
+        SinkIntoLoop();
     }
   }
 
@@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(Machine
   }
 }
 
+void MachineLICM::SinkIntoLoop() {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  SmallVector<MachineInstr *, 8> Candidates;
+  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+       I != Preheader->instr_end(); ++I) {
+    // We need to ensure that we can safely move this instruction into the loop.
+    // As such, it must not have side-effects, e.g. such as a call has.  
+    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+      Candidates.push_back(I);
+  }
+
+  for (MachineInstr *I : Candidates) {
+    const MachineOperand &MO = I->getOperand(0);
+    if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+      continue;
+    if (!MRI->hasOneDef(MO.getReg()))
+      continue;
+    bool CanSink = true;
+    MachineBasicBlock *B = nullptr;
+    for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+      // FIXME: Come up with a proper cost model that estimates whether sinking
+      // the instruction (and thus possibly executing it on every loop
+      // iteration) is more expensive than a register.
+      // For now assumes that copies are cheap and thus almost always worth it.
+      if (!MI.isCopy()) {
+        CanSink = false;
+        break;
+      }
+      if (!B) {
+        B = MI.getParent();
+        continue;
+      }
+      B = DT->findNearestCommonDominator(B, MI.getParent());
+      if (!B) {
+        CanSink = false;
+        break;
+      }
+    }
+    if (!CanSink || !B || B == Preheader)
+      continue;
+    B->splice(B->getFirstNonPHI(), Preheader, I);
+  }
+}
+
 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
 }

Added: llvm/trunk/test/CodeGen/X86/sink-cheap-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sink-cheap-instructions.ll?rev=232262&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sink-cheap-instructions.ll (added)
+++ llvm/trunk/test/CodeGen/X86/sink-cheap-instructions.ll Sat Mar 14 05:58:38 2015
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK
+
+; Ensure that we sink copy-like instructions into loops to avoid register
+; spills.
+
+; CHECK: Spill
+; SINK-NOT: Spill
+
+%struct.A = type { i32, i32, i32, i32, i32, i32 }
+
+define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) {
+  %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+  %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
+  %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
+  %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
+  %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
+  %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
+  br label %.backedge
+
+.backedge:
+  %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ]
+  %7 = getelementptr inbounds i8, i8* %.0, i64 1
+  %8 = load i8, i8* %7, align 1
+  switch i8 %8, label %.backedge.backedge [
+    i8 0, label %9
+    i8 10, label %10
+    i8 20, label %11
+    i8 30, label %12
+    i8 40, label %13
+    i8 50, label %14
+  ]
+
+; <label>:9
+  tail call void @_Z6assignPj(i32* %1)
+  br label %.backedge.backedge
+
+; <label>:10
+  tail call void @_Z6assignPj(i32* %2)
+  br label %.backedge.backedge
+
+.backedge.backedge:
+  br label %.backedge
+
+; <label>:11
+  tail call void @_Z6assignPj(i32* %3)
+  br label %.backedge.backedge
+
+; <label>:12
+  tail call void @_Z6assignPj(i32* %4)
+  br label %.backedge.backedge
+
+; <label>:13
+  tail call void @_Z6assignPj(i32* %5)
+  br label %.backedge.backedge
+
+; <label>:14
+  tail call void @_Z6assignPj(i32* %6)
+  br label %.backedge.backedge
+}
+
+declare void @_Z6assignPj(i32*)