[llvm] r320814 - [InlineCost] Find repeated loads in the callee

Haicheng Wu via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 06:34:41 PST 2017


Author: haicheng
Date: Fri Dec 15 06:34:41 2017
New Revision: 320814

URL: http://llvm.org/viewvc/llvm-project?rev=320814&view=rev
Log:
[InlineCost] Find repeated loads in the callee

SROA analysis of InlineCost can figure out that some stores can be removed
after inlining and then the repeated loads clobbered by these stores are also
free.  This patch finds these clobbered loads and adjust the inline cost
accordingly.

Differential Revision: https://reviews.llvm.org/D33946

Added:
    llvm/trunk/test/Transforms/Inline/redundant-loads.ll
Modified:
    llvm/trunk/include/llvm/Analysis/ValueTracking.h
    llvm/trunk/lib/Analysis/InlineCost.cpp
    llvm/trunk/lib/Analysis/ValueTracking.cpp

Modified: llvm/trunk/include/llvm/Analysis/ValueTracking.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ValueTracking.h?rev=320814&r1=320813&r2=320814&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/ValueTracking.h (original)
+++ llvm/trunk/include/llvm/Analysis/ValueTracking.h Fri Dec 15 06:34:41 2017
@@ -366,6 +366,10 @@ class Value;
   /// operands are not memory dependent.
   bool mayBeMemoryDependent(const Instruction &I);
 
+  /// Return true if it is an intrinsic that cannot be speculated but also
+  /// cannot trap.
+  bool isAssumeLikeIntrinsic(const Instruction *I);
+
   /// Return true if it is valid to use the assumptions provided by an
   /// assume intrinsic, I, at the point in the control-flow identified by the
   /// context instruction, CxtI.

Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=320814&r1=320813&r2=320814&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Fri Dec 15 06:34:41 2017
@@ -25,6 +25,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
@@ -171,6 +172,13 @@ class CallAnalyzer : public InstVisitor<
   /// constant arguments.
   DenseMap<BasicBlock *, BasicBlock *> KnownSuccessors;
 
+  /// Model the elimination of repeated loads that is expected to happen
+  /// whenever we simplify away the stores that would otherwise cause them to be
+  /// loads.
+  bool EnableLoadElimination;
+  SmallPtrSet<Value *, 16> LoadAddrSet;
+  int LoadEliminationCost;
+
   // Custom simplification helper routines.
   bool isAllocaDerivedArg(Value *V);
   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
@@ -180,6 +188,7 @@ class CallAnalyzer : public InstVisitor<
   void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB);
   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
                           int InstructionCost);
+  void disableLoadElimination();
   bool isGEPFree(GetElementPtrInst &GEP);
   bool canFoldInboundsGEP(GetElementPtrInst &I);
   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
@@ -275,10 +284,10 @@ public:
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
         NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
+        EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -333,6 +342,7 @@ void CallAnalyzer::disableSROA(DenseMap<
   SROACostSavings -= CostIt->second;
   SROACostSavingsLost += CostIt->second;
   SROAArgCosts.erase(CostIt);
+  disableLoadElimination();
 }
 
 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
@@ -350,6 +360,13 @@ void CallAnalyzer::accumulateSROACost(De
   SROACostSavings += InstructionCost;
 }
 
+void CallAnalyzer::disableLoadElimination() {
+  if (EnableLoadElimination) {
+    Cost += LoadEliminationCost;
+    EnableLoadElimination = false;
+  }
+}
+
 /// \brief Accumulate a constant GEP offset into an APInt if possible.
 ///
 /// Returns false if unable to compute the offset for any reason. Respects any
@@ -1076,6 +1093,15 @@ bool CallAnalyzer::visitLoad(LoadInst &I
     disableSROA(CostIt);
   }
 
+  // If the data is already loaded from this address and hasn't been clobbered
+  // by any stores or calls, this load is likely to be redundant and can be
+  // eliminated.
+  if (EnableLoadElimination &&
+      !LoadAddrSet.insert(I.getPointerOperand()).second) {
+    LoadEliminationCost += InlineConstants::InstrCost;
+    return true;
+  }
+
   return false;
 }
 
@@ -1091,6 +1117,15 @@ bool CallAnalyzer::visitStore(StoreInst
     disableSROA(CostIt);
   }
 
+  // The store can potentially clobber loads and prevent repeated loads from
+  // being eliminated.
+  // FIXME:
+  // 1. We can probably keep an initial set of eliminatable loads substracted
+  // from the cost even when we finally see a store. We just need to disable
+  // *further* accumulation of elimination savings.
+  // 2. We should probably at some point thread MemorySSA for the callee into
+  // this and then use that to actually compute *really* precise savings.
+  disableLoadElimination();
   return false;
 }
 
@@ -1173,6 +1208,8 @@ bool CallAnalyzer::visitCallSite(CallSit
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
       switch (II->getIntrinsicID()) {
       default:
+        if (!CS.onlyReadsMemory() && !isAssumeLikeIntrinsic(II))
+          disableLoadElimination();
         return Base::visitCallSite(CS);
 
       case Intrinsic::load_relative:
@@ -1183,6 +1220,7 @@ bool CallAnalyzer::visitCallSite(CallSit
       case Intrinsic::memset:
       case Intrinsic::memcpy:
       case Intrinsic::memmove:
+        disableLoadElimination();
         // SROA can usually chew through these intrinsics, but they aren't free.
         return false;
       case Intrinsic::localescape:
@@ -1209,6 +1247,8 @@ bool CallAnalyzer::visitCallSite(CallSit
         Cost += InlineConstants::CallPenalty;
     }
 
+    if (!CS.onlyReadsMemory())
+      disableLoadElimination();
     return Base::visitCallSite(CS);
   }
 
@@ -1223,8 +1263,11 @@ bool CallAnalyzer::visitCallSite(CallSit
   // Next, check if this happens to be an indirect function call to a known
   // function in this inline context. If not, we've done all we can.
   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
-  if (!F)
+  if (!F) {
+    if (!CS.onlyReadsMemory())
+      disableLoadElimination();
     return Base::visitCallSite(CS);
+  }
 
   // If we have a constant that we are calling as a function, we can peer
   // through it and see the function target. This happens not infrequently
@@ -1241,6 +1284,8 @@ bool CallAnalyzer::visitCallSite(CallSit
     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
   }
 
+  if (!F->onlyReadsMemory())
+    disableLoadElimination();
   return Base::visitCallSite(CS);
 }
 
@@ -1843,6 +1888,7 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump
   DEBUG_PRINT_STAT(NumInstructions);
   DEBUG_PRINT_STAT(SROACostSavings);
   DEBUG_PRINT_STAT(SROACostSavingsLost);
+  DEBUG_PRINT_STAT(LoadEliminationCost);
   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
   DEBUG_PRINT_STAT(Cost);
   DEBUG_PRINT_STAT(Threshold);

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=320814&r1=320813&r2=320814&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Fri Dec 15 06:34:41 2017
@@ -483,7 +483,7 @@ static bool isEphemeralValueOf(const Ins
 }
 
 // Is this an intrinsic that cannot be speculated but also cannot trap?
-static bool isAssumeLikeIntrinsic(const Instruction *I) {
+bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
   if (const CallInst *CI = dyn_cast<CallInst>(I))
     if (Function *F = CI->getCalledFunction())
       switch (F->getIntrinsicID()) {

Added: llvm/trunk/test/Transforms/Inline/redundant-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/redundant-loads.ll?rev=320814&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/redundant-loads.ll (added)
+++ llvm/trunk/test/Transforms/Inline/redundant-loads.ll Fri Dec 15 06:34:41 2017
@@ -0,0 +1,186 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=3  | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @pad() readnone
+
+define void @outer1(i32* %a) {
+; CHECK-LABEL: @outer1(
+; CHECK-NOT: call void @inner1
+  %b = alloca i32
+  call void @inner1(i32* %a, i32* %b)
+  ret void
+}
+
+define void @inner1(i32* %a, i32* %b) {
+  %1 = load i32, i32* %a
+  store i32 %1, i32 * %b ; This store does not clobber the first load.
+  %2 = load i32, i32* %a
+  call void @pad()
+  %3 = load i32, i32* %a
+  ret void
+}
+
+
+define void @outer2(i32* %a, i32* %b) {
+; CHECK-LABEL: @outer2(
+; CHECK: call void @inner2
+  call void @inner2(i32* %a, i32* %b)
+  ret void
+}
+
+define void @inner2(i32* %a, i32* %b) {
+  %1 = load i32, i32* %a
+  store i32 %1, i32 * %b ; This store clobbers the first load.
+  %2 = load i32, i32* %a
+  call void @pad()
+  ret void
+}
+
+
+define void @outer3(i32* %a) {
+; CHECK-LABEL: @outer3(
+; CHECK: call void @inner3
+  call void @inner3(i32* %a)
+  ret void
+}
+
+declare void @ext()
+
+define void @inner3(i32* %a) {
+  %1 = load i32, i32* %a
+  call void @ext() ; This call clobbers the first load.
+  %2 = load i32, i32* %a
+  ret void
+}
+
+
+define void @outer4(i32* %a, i32* %b, i32* %c) {
+; CHECK-LABEL: @outer4(
+; CHECK-NOT: call void @inner4
+  call void @inner4(i32* %a, i32* %b, i1 false)
+  ret void
+}
+
+define void @inner4(i32* %a, i32* %b, i1 %pred) {
+  %1 = load i32, i32* %a
+  br i1 %pred, label %cond_true, label %cond_false
+
+cond_true:
+  store i32 %1, i32 * %b ; This store does not clobber the first load.
+  br label %cond_false
+
+cond_false:
+  %2 = load i32, i32* %a
+  call void @pad()
+  %3 = load i32, i32* %a
+  %4 = load i32, i32* %a
+  ret void
+}
+
+
+define void @outer5(i32* %a, double %b) {
+; CHECK-LABEL: @outer5(
+; CHECK-NOT: call void @inner5
+  call void @inner5(i32* %a, double %b)
+  ret void
+}
+
+declare double @llvm.fabs.f64(double) nounwind readnone
+
+define void @inner5(i32* %a, double %b) {
+  %1 = load i32, i32* %a
+  %2 = call double @llvm.fabs.f64(double %b) ; This intrinsic does not clobber the first load.
+  %3 = load i32, i32* %a
+  call void @pad()
+  ret void
+}
+
+define void @outer6(i32* %a, i8* %ptr) {
+; CHECK-LABEL: @outer6(
+; CHECK-NOT: call void @inner6
+  call void @inner6(i32* %a, i8* %ptr)
+  ret void
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) argmemonly nounwind
+
+define void @inner6(i32* %a, i8* %ptr) {
+  %1 = load i32, i32* %a
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %ptr) ; This intrinsic does not clobber the first load.
+  %2 = load i32, i32* %a
+  call void @pad()
+  %3 = load i32, i32* %a
+  ret void
+}
+
+define void @outer7(i32* %a) {
+; CHECK-LABEL: @outer7(
+; CHECK-NOT: call void @inner7
+  call void @inner7(i32* %a)
+  ret void
+}
+
+declare void @ext2() readnone
+
+define void @inner7(i32* %a) {
+  %1 = load i32, i32* %a
+  call void @ext2() ; This call does not clobber the first load.
+  %2 = load i32, i32* %a
+  ret void
+}
+
+
+define void @outer8(i32* %a) {
+; CHECK-LABEL: @outer8(
+; CHECK-NOT: call void @inner8
+  call void @inner8(i32* %a, void ()* @ext2)
+  ret void
+}
+
+define void @inner8(i32* %a, void ()* %f) {
+  %1 = load i32, i32* %a
+  call void %f() ; This indirect call does not clobber the first load.
+  %2 = load i32, i32* %a
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  ret void
+}
+
+
+define void @outer9(i32* %a) {
+; CHECK-LABEL: @outer9(
+; CHECK: call void @inner9
+  call void @inner9(i32* %a, void ()* @ext)
+  ret void
+}
+
+define void @inner9(i32* %a, void ()* %f) {
+  %1 = load i32, i32* %a
+  call void %f() ; This indirect call clobbers the first load.
+  %2 = load i32, i32* %a
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  call void @pad()
+  ret void
+}




More information about the llvm-commits mailing list