[llvm-commits] [llvm] r113102 - in /llvm/trunk: include/llvm/CodeGen/FastISel.h include/llvm/CodeGen/SelectionDAGISel.h lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp lib/Target/X86/X86FastISel.cpp lib/Target/X86/X86InstrBuilder.h lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/fast-isel-mem.ll

Chris Lattner sabre at nondot.org
Sat Sep 4 19:18:34 PDT 2010


Author: lattner
Date: Sat Sep  4 21:18:34 2010
New Revision: 113102

URL: http://llvm.org/viewvc/llvm-project?rev=113102&view=rev
Log:
implement rdar://6653118 - fastisel should fold loads where possible.

Since mem2reg isn't run at -O0, we get a ton of reloads from the stack,
for example, before, this code:

int foo(int x, int y, int z) {
  return x+y+z;
}

used to compile into:

_foo:                                   ## @foo
	subq	$12, %rsp
	movl	%edi, 8(%rsp)
	movl	%esi, 4(%rsp)
	movl	%edx, (%rsp)
	movl	8(%rsp), %edx
	movl	4(%rsp), %esi
	addl	%edx, %esi
	movl	(%rsp), %edx
	addl	%esi, %edx
	movl	%edx, %eax
	addq	$12, %rsp
	ret

Now we produce:

_foo:                                   ## @foo
	subq	$12, %rsp
	movl	%edi, 8(%rsp)
	movl	%esi, 4(%rsp)
	movl	%edx, (%rsp)
	movl	8(%rsp), %edx
	addl	4(%rsp), %edx    ## Folded load
	addl	(%rsp), %edx     ## Folded load
	movl	%edx, %eax
	addq	$12, %rsp
	ret

Fewer instructions and less register use = faster compiles.


Modified:
    llvm/trunk/include/llvm/CodeGen/FastISel.h
    llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
    llvm/trunk/lib/Target/X86/X86FastISel.cpp
    llvm/trunk/lib/Target/X86/X86InstrBuilder.h
    llvm/trunk/lib/Target/X86/X86InstrInfo.h
    llvm/trunk/test/CodeGen/X86/fast-isel-mem.ll

Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/FastISel.h (original)
+++ llvm/trunk/include/llvm/CodeGen/FastISel.h Sat Sep  4 21:18:34 2010
@@ -39,6 +39,7 @@
 class TargetMachine;
 class TargetRegisterClass;
 class TargetRegisterInfo;
+class LoadInst;
 
 /// FastISel - This is a fast-path instruction selection class that
 /// generates poor code and doesn't support illegal types or non-trivial
@@ -102,7 +103,16 @@
   /// index value.
   std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
 
-  /// recomputeInsertPt - Reset InsertPt to prepare for insterting instructions
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/,
+                             const LoadInst * /*LI*/) {
+    return false;
+  }
+  
+  /// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions
   /// into the current block.
   void recomputeInsertPt();
 

Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h Sat Sep  4 21:18:34 2010
@@ -34,6 +34,7 @@
   class ScheduleHazardRecognizer;
   class GCFunctionInfo;
   class ScheduleDAGSDNodes;
+  class LoadInst;
  
 /// SelectionDAGISel - This is the common base class used for SelectionDAG-based
 /// pattern-matching instruction selectors.
@@ -282,6 +283,7 @@
   
   void PrepareEHLandingPad();
   void SelectAllBasicBlocks(const Function &Fn);
+  bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
   void FinishBasicBlock();
 
   void SelectBasicBlock(BasicBlock::const_iterator Begin,

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Sat Sep  4 21:18:34 2010
@@ -661,6 +661,43 @@
   }
 }
 
+
+
+  
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             FastISel *FastIS) {
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile()) return false;
+  
+  // Figure out which vreg this is going into.
+  unsigned LoadReg = FastIS->getRegForValue(LI);
+  assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+  // Check to see what the uses of this vreg are.  If it has no uses, or more
+  // than one use (at the machine instr level) then we can't fold it.
+  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+  if (RI == RegInfo->reg_end())
+    return false;
+  
+  // See if there is exactly one use of the vreg.  If there are multiple uses,
+  // then the instruction got lowered to multiple machine instructions or the
+  // use of the loaded value ended up being multiple operands of the result, in
+  // either case, we can't fold this.
+  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+  if (PostRI != RegInfo->reg_end())
+    return false;
+  
+  assert(RI.getOperand().isUse() &&
+         "The only use of the vreg must be a use, we haven't emitted the def!");
+
+  // Ask the target to try folding the load.
+  return FastIS->TryToFoldLoad(&*RI, RI.getOperandNo(), LI);
+}
+
+  
+
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
@@ -723,8 +760,21 @@
         FastIS->recomputeInsertPt();
 
         // Try to select the instruction with FastISel.
-        if (FastIS->SelectInstruction(Inst))
+        if (FastIS->SelectInstruction(Inst)) {
+          // If fast isel succeeded, check to see if there is a single-use
+          // non-volatile load right before the selected instruction, and see if
+          // the load is used by the instruction.  If so, try to fold it.
+          const Instruction *BeforeInst = 0;
+          if (Inst != Begin)
+            BeforeInst = llvm::prior(llvm::prior(BI));
+          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) {
+            // If we succeeded, don't re-select the load.
+            --BI;
+          }          
           continue;
+        }
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {

Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Sat Sep  4 21:18:34 2010
@@ -63,6 +63,13 @@
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+  
 #include "X86GenFastISel.inc"
 
 private:
@@ -1941,6 +1948,34 @@
   return ResultReg;
 }
 
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+  
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+  
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+  
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+  
+  MI->getParent()->insert(MI, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
 namespace llvm {
   llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
     return new X86FastISel(funcInfo);

Modified: llvm/trunk/lib/Target/X86/X86InstrBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrBuilder.h?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrBuilder.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrBuilder.h Sat Sep  4 21:18:34 2010
@@ -56,6 +56,31 @@
     : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
     Base.Reg = 0;
   }
+  
+  
+  void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+    assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+    
+    if (BaseType == X86AddressMode::RegBase)
+      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+                                             false, false, false, 0, false));
+    else {
+      assert(BaseType == X86AddressMode::FrameIndexBase);
+      MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+    }
+    
+    MO.push_back(MachineOperand::CreateImm(Scale));
+    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+                                           false, false, false, 0, false));
+    
+    if (GV)
+      MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+    else
+      MO.push_back(MachineOperand::CreateImm(Disp));
+    
+    MO.push_back(MachineOperand::CreateReg(0, false, false,
+                                           false, false, false, 0, false));
+  }
 };
 
 /// addDirectMem - This function is used to add a direct memory reference to the
@@ -101,10 +126,11 @@
   
   if (AM.BaseType == X86AddressMode::RegBase)
     MIB.addReg(AM.Base.Reg);
-  else if (AM.BaseType == X86AddressMode::FrameIndexBase)
+  else {
+    assert(AM.BaseType == X86AddressMode::FrameIndexBase);
     MIB.addFrameIndex(AM.Base.FrameIndex);
-  else
-    assert (0);
+  }
+
   MIB.addImm(AM.Scale).addReg(AM.IndexReg);
   if (AM.GV)
     MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Sat Sep  4 21:18:34 2010
@@ -845,18 +845,18 @@
   /// SetSSEDomain - Set the SSEDomain of MI.
   void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
 
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      unsigned OpNum,
+                                      const SmallVectorImpl<MachineOperand> &MOs,
+                                      unsigned Size, unsigned Alignment) const;
+  
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
 
-  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                     MachineInstr* MI,
-                                     unsigned OpNum,
-                                     const SmallVectorImpl<MachineOperand> &MOs,
-                                     unsigned Size, unsigned Alignment) const;
-
   /// isFrameOperand - Return true and the FrameIndex if the specified
   /// operand and follow operands form a reference to the stack frame.
   bool isFrameOperand(const MachineInstr *MI, unsigned int Op,

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-mem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-mem.ll?rev=113102&r1=113101&r2=113102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-mem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-mem.ll Sat Sep  4 21:18:34 2010
@@ -1,10 +1,8 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
-; RUN:   grep lazy_ptr, | count 2
-; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
-; RUN:   grep lea
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
 
 @src = external global i32
 
+; rdar://6653118
 define i32 @loadgv() nounwind {
 entry:
 	%0 = load i32* @src, align 4
@@ -12,6 +10,14 @@
         %2 = add i32 %0, %1
         store i32 %2, i32* @src
 	ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
+; CHECK: 	movl	(%ecx), %eax
+; CHECK: 	addl	(%ecx), %eax
+; CHECK: 	movl	%eax, (%ecx)
+; CHECK: 	ret
+
 }
 
 %stuff = type { i32 (...)** }
@@ -21,4 +27,8 @@
 entry:
 	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
 	ret void
+; CHECK: _t:
+; CHECK:	movl	$0, %eax
+; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
+
 }





More information about the llvm-commits mailing list