[llvm] r358422 - [PGO] Profile guided code size optimization.

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 15 09:49:01 PDT 2019


Author: yamauchi
Date: Mon Apr 15 09:49:00 2019
New Revision: 358422

URL: http://llvm.org/viewvc/llvm-project?rev=358422&view=rev
Log:
[PGO] Profile guided code size optimization.

Summary:
Enable some of the existing size optimizations for cold code under PGO.

A ~5% code size saving in big internal app under PGO.

The way it gets BFI/PSI is discussed in the RFC thread

http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html 

Note it doesn't currently touch loop passes.

Reviewers: davidxl, eraman

Reviewed By: eraman

Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59514

Added:
    llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h
    llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp
Modified:
    llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
    llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h
    llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h
    llvm/trunk/lib/Passes/PassBuilder.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/lib/Transforms/Utils/CMakeLists.txt
    llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/trunk/test/Other/new-pm-defaults.ll
    llvm/trunk/test/Other/new-pm-lto-defaults.ll
    llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
    llvm/trunk/test/Other/opt-O2-pipeline.ll
    llvm/trunk/test/Other/opt-O3-pipeline.ll
    llvm/trunk/test/Other/opt-Os-pipeline.ll
    llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
    llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll
    llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll
    llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
    llvm/trunk/test/Transforms/LoopVectorize/optsize.ll

Modified: llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h Mon Apr 15 09:49:00 2019
@@ -55,6 +55,7 @@ class DominatorTree;
 class Function;
 class GlobalVariable;
 class Instruction;
+class ProfileSummaryInfo;
 class TargetTransformInfo;
 
 /// A private "module" namespace for types and utilities used by
@@ -124,7 +125,8 @@ public:
 
   // Glue for old PM.
   bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
-               BlockFrequencyInfo *BFI, BasicBlock &Entry);
+               BlockFrequencyInfo *BFI, BasicBlock &Entry,
+               ProfileSummaryInfo *PSI);
 
   void cleanup() {
     ClonedCastMap.clear();
@@ -148,6 +150,7 @@ private:
   LLVMContext *Ctx;
   const DataLayout *DL;
   BasicBlock *Entry;
+  ProfileSummaryInfo *PSI;
 
   /// Keeps track of constant candidates found in the function.
   using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;

Modified: llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h Mon Apr 15 09:49:00 2019
@@ -28,6 +28,8 @@ class TargetLibraryInfo;
 class BasicBlock;
 class Function;
 class OptimizationRemarkEmitter;
+class BlockFrequencyInfo;
+class ProfileSummaryInfo;
 
 /// This class implements simplifications for calls to fortified library
 /// functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to,
@@ -74,6 +76,8 @@ private:
   const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   OptimizationRemarkEmitter &ORE;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
   bool UnsafeFPShrink;
   function_ref<void(Instruction *, Value *)> Replacer;
   function_ref<void(Instruction *)> Eraser;
@@ -101,6 +105,7 @@ public:
   LibCallSimplifier(
       const DataLayout &DL, const TargetLibraryInfo *TLI,
       OptimizationRemarkEmitter &ORE,
+      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
       function_ref<void(Instruction *, Value *)> Replacer =
           &replaceAllUsesWithDefault,
       function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);

Added: llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h?rev=358422&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h (added)
+++ llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h Mon Apr 15 09:49:00 2019
@@ -0,0 +1,34 @@
+//===- llvm/Transforms/Utils/SizeOpts.h - size optimization -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+#define LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
+
+namespace llvm {
+
+class BasicBlock;
+class BlockFrequencyInfo;
+class Function;
+class ProfileSummaryInfo;
+
+/// Returns true if function \p F is suggested to be size-optimized base on the
+/// profile.
+bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+                           BlockFrequencyInfo *BFI);
+/// Returns true if basic block \p BB is suggested to be size-optimized base
+/// on the profile.
+bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+                           BlockFrequencyInfo *BFI);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SiZEOPTS_H

Modified: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h Mon Apr 15 09:49:00 2019
@@ -24,11 +24,13 @@ namespace llvm {
 
 class AssumptionCache;
 class BasicBlock;
+class BlockFrequencyInfo;
 class DependenceInfo;
 class DominatorTree;
 class Loop;
 class LoopInfo;
 class MDNode;
+class ProfileSummaryInfo;
 class OptimizationRemarkEmitter;
 class ScalarEvolution;
 
@@ -120,7 +122,8 @@ void simplifyLoopAfterUnroll(Loop *L, bo
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 
 TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
-    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
     Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling);

Modified: llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h (original)
+++ llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h Mon Apr 15 09:49:00 2019
@@ -71,6 +71,7 @@ class Loop;
 class LoopAccessInfo;
 class LoopInfo;
 class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
 class ScalarEvolution;
 class TargetLibraryInfo;
 class TargetTransformInfo;
@@ -96,6 +97,7 @@ struct LoopVectorizePass : public PassIn
   AssumptionCache *AC;
   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
   OptimizationRemarkEmitter *ORE;
+  ProfileSummaryInfo *PSI;
 
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
@@ -105,7 +107,7 @@ struct LoopVectorizePass : public PassIn
                BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
                DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
                std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
-               OptimizationRemarkEmitter &ORE);
+               OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
 
   bool processLoop(Loop *L);
 };

Modified: llvm/trunk/lib/Passes/PassBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassBuilder.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp (original)
+++ llvm/trunk/lib/Passes/PassBuilder.cpp Mon Apr 15 09:49:00 2019
@@ -575,8 +575,12 @@ void PassBuilder::addPGOInstrPasses(Modu
     Options.DoCounterPromotion = true;
     Options.UseBFIInPromotion = IsCS;
     MPM.addPass(InstrProfiling(Options, IsCS));
-  } else if (!ProfileFile.empty())
+  } else if (!ProfileFile.empty()) {
     MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+  }
 }
 
 static InlineParams
@@ -649,6 +653,9 @@ PassBuilder::buildModuleSimplificationPi
     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
                                         PGOOpt->ProfileRemappingFile,
                                         Phase == ThinLTOPhase::PreLink));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
     // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
     // for the profile annotation to be accurate in the ThinLTO backend.
     if (Phase != ThinLTOPhase::PreLink)
@@ -1065,6 +1072,9 @@ PassBuilder::buildLTODefaultPipeline(Opt
     MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
                                         PGOOpt->ProfileRemappingFile,
                                         false /* ThinLTOPhase::PreLink */));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
   }
 
   // Remove unused virtual tables to improve the quality of code generated by

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Apr 15 09:49:00 2019
@@ -4178,7 +4178,7 @@ Instruction *InstCombiner::tryOptimizeCa
   auto InstCombineErase = [this](Instruction *I) {
     eraseInstFromFunction(*I);
   };
-  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+  LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
                                InstCombineErase);
   if (Value *With = Simplifier.optimizeCall(CI)) {
     ++NumSimplified;

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h Mon Apr 15 09:49:00 2019
@@ -52,12 +52,14 @@ namespace llvm {
 
 class APInt;
 class AssumptionCache;
+class BlockFrequencyInfo;
 class DataLayout;
 class DominatorTree;
 class GEPOperator;
 class GlobalVariable;
 class LoopInfo;
 class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
 class TargetLibraryInfo;
 class User;
 
@@ -304,6 +306,8 @@ private:
   const DataLayout &DL;
   const SimplifyQuery SQ;
   OptimizationRemarkEmitter &ORE;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
 
   // Optional analyses. When non-null, these can both be used to do better
   // combining and will be updated to reflect any changes.
@@ -315,11 +319,11 @@ public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
                bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
                AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
-               OptimizationRemarkEmitter &ORE, const DataLayout &DL,
-               LoopInfo *LI)
+               OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+               ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
         ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
-        DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
+        DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
 
   /// Run the combiner over the entire worklist until it is empty.
   ///

Modified: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Mon Apr 15 09:49:00 2019
@@ -46,14 +46,17 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -3478,7 +3481,8 @@ static bool prepareICWorklistFromFunctio
 static bool combineInstructionsOverFunction(
     Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
     AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
-    OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
+    OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+    ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
     LoopInfo *LI = nullptr) {
   auto &DL = F.getParent()->getDataLayout();
   ExpensiveCombines |= EnableExpensiveCombines;
@@ -3509,7 +3513,7 @@ static bool combineInstructionsOverFunct
     MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
 
     InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
-                    AC, TLI, DT, ORE, DL, LI);
+                    AC, TLI, DT, ORE, BFI, PSI, DL, LI);
     IC.MaxArraySizeForCombine = MaxArraySize;
 
     if (!IC.run())
@@ -3529,8 +3533,15 @@ PreservedAnalyses InstCombinePass::run(F
   auto *LI = AM.getCachedResult<LoopAnalysis>(F);
 
   auto *AA = &AM.getResult<AAManager>(F);
+  const ModuleAnalysisManager &MAM =
+      AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+  ProfileSummaryInfo *PSI =
+      MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+
   if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
-                                       ExpensiveCombines, LI))
+                                       BFI, PSI, ExpensiveCombines, LI))
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
 
@@ -3554,6 +3565,8 @@ void InstructionCombiningPass::getAnalys
   AU.addPreserved<AAResultsWrapperPass>();
   AU.addPreserved<BasicAAWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
+  LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
 }
 
 bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3570,9 +3583,15 @@ bool InstructionCombiningPass::runOnFunc
   // Optional analyses.
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  ProfileSummaryInfo *PSI =
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  BlockFrequencyInfo *BFI =
+      (PSI && PSI->hasProfileSummary()) ?
+      &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+      nullptr;
 
   return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
-                                         ExpensiveCombines, LI);
+                                         BFI, PSI, ExpensiveCombines, LI);
 }
 
 char InstructionCombiningPass::ID = 0;
@@ -3585,6 +3604,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTree
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
                     "Combine redundant instructions", false, false)
 

Modified: llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp Mon Apr 15 09:49:00 2019
@@ -41,6 +41,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
@@ -60,6 +61,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -111,6 +113,7 @@ public:
     if (ConstHoistWithBlockFrequency)
       AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
   }
 
@@ -126,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLe
                       "Constant Hoisting", false, false)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
                     "Constant Hoisting", false, false)
@@ -148,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFu
                    ConstHoistWithBlockFrequency
                        ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
                        : nullptr,
-                   Fn.getEntryBlock());
+                   Fn.getEntryBlock(),
+                   &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
 
   if (MadeChange) {
     LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsI
                                            ConstCandVecType::iterator &MaxCostItr) {
   unsigned NumUses = 0;
 
-  if(!Entry->getParent()->hasOptSize() || std::distance(S,E) > 100) {
+  bool OptForSize = Entry->getParent()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+  if (!OptForSize || std::distance(S,E) > 100) {
     for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
       NumUses += ConstCand->Uses.size();
       if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
@@ -919,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCas
 /// Optimize expensive integer constants in the given function.
 bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
                                    DominatorTree &DT, BlockFrequencyInfo *BFI,
-                                   BasicBlock &Entry) {
+                                   BasicBlock &Entry, ProfileSummaryInfo *PSI) {
   this->TTI = &TTI;
   this->DT = &DT;
   this->BFI = BFI;
   this->DL = &Fn.getParent()->getDataLayout();
   this->Ctx = &Fn.getContext();
   this->Entry = &Entry;
+  this->PSI = PSI;
   // Collect all constant candidates.
   collectConstantCandidates(Fn);
 
@@ -962,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::
   auto BFI = ConstHoistWithBlockFrequency
                  ? &AM.getResult<BlockFrequencyAnalysis>(F)
                  : nullptr;
-  if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
+  auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+  auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;

Modified: llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp Mon Apr 15 09:49:00 2019
@@ -29,11 +29,14 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -54,6 +57,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
 #include <forward_list>
@@ -159,8 +163,9 @@ namespace {
 class LoadEliminationForLoop {
 public:
   LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
-                         DominatorTree *DT)
-      : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
+                         DominatorTree *DT, BlockFrequencyInfo *BFI,
+                         ProfileSummaryInfo* PSI)
+      : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
 
   /// Look through the loop-carried and loop-independent dependences in
   /// this loop and find store->load dependences.
@@ -529,7 +534,11 @@ public:
     }
 
     if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
-      if (L->getHeader()->getParent()->hasOptSize()) {
+      auto *HeaderBB = L->getHeader();
+      auto *F = HeaderBB->getParent();
+      bool OptForSize = F->hasOptSize() ||
+                        llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+      if (OptForSize) {
         LLVM_DEBUG(
             dbgs() << "Versioning is needed but not allowed when optimizing "
                       "for size.\n");
@@ -572,6 +581,8 @@ private:
   LoopInfo *LI;
   const LoopAccessInfo &LAI;
   DominatorTree *DT;
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
   PredicatedScalarEvolution PSE;
 };
 
@@ -579,6 +590,7 @@ private:
 
 static bool
 eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+                          BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
                           function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
   // Build up a worklist of inner-loops to transform to avoid iterator
   // invalidation.
@@ -597,7 +609,7 @@ eliminateLoadsAcrossLoops(Function &F, L
   bool Changed = false;
   for (Loop *L : Worklist) {
     // The actual work is performed by LoadEliminationForLoop.
-    LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+    LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
     Changed |= LEL.processLoop();
   }
   return Changed;
@@ -622,10 +634,14 @@ public:
     auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+                &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+                nullptr;
 
     // Process each loop nest in the function.
     return eliminateLoadsAcrossLoops(
-        F, LI, DT,
+        F, LI, DT, BFI, PSI,
         [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
   }
 
@@ -638,6 +654,8 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
   }
 };
 
@@ -653,6 +671,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLeg
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
 
 FunctionPass *llvm::createLoopLoadEliminationPass() {
@@ -668,13 +688,17 @@ PreservedAnalyses LoopLoadEliminationPas
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
+  auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+  auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
   MemorySSA *MSSA = EnableMSSALoopDependency
                         ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
                         : nullptr;
 
   auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
   bool Changed = eliminateLoadsAcrossLoops(
-      F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
+      F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
         LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
         return LAM.getResult<LoopAccessAnalysis>(L, AR);
       });

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp Mon Apr 15 09:49:00 2019
@@ -294,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, Dominator
     return LoopUnrollResult::Unmodified;
 
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, SE, TTI, OptLevel, None, None, None, None, None, None);
+      L, SE, TTI, nullptr, nullptr, OptLevel,
+      None, None, None, None, None, None);
   if (AllowUnrollAndJam.getNumOccurrences() > 0)
     UP.UnrollAndJam = AllowUnrollAndJam;
   if (UnrollAndJamThreshold.getNumOccurrences() > 0)

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Mon Apr 15 09:49:00 2019
@@ -23,7 +23,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopAnalysisManager.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -55,6 +57,7 @@
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <algorithm>
 #include <cassert>
@@ -165,7 +168,8 @@ static const unsigned NoThreshold = std:
 /// Gather the various unrolling parameters based on the defaults, compiler
 /// flags, TTI overrides and user specified parameters.
 TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
-    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+    Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
     Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
     Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
     Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
@@ -198,7 +202,9 @@ TargetTransformInfo::UnrollingPreference
   TTI.getUnrollingPreferences(L, SE, UP);
 
   // Apply size attributes
-  if (L->getHeader()->getParent()->hasOptSize()) {
+  bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+  if (OptForSize) {
     UP.Threshold = UP.OptSizeThreshold;
     UP.PartialThreshold = UP.PartialOptSizeThreshold;
   }
@@ -963,7 +969,9 @@ bool llvm::computeUnrollCount(
 static LoopUnrollResult tryToUnrollLoop(
     Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
     const TargetTransformInfo &TTI, AssumptionCache &AC,
-    OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
+    OptimizationRemarkEmitter &ORE,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+    bool PreserveLCSSA, int OptLevel,
     bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
     Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
     Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
@@ -989,7 +997,7 @@ static LoopUnrollResult tryToUnrollLoop(
   bool NotDuplicatable;
   bool Convergent;
   TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+      L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
       ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
       ProvidedAllowPeeling);
   // Exit early if unrolling is disabled.
@@ -1176,7 +1184,8 @@ public:
     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
     LoopUnrollResult Result = tryToUnrollLoop(
-        L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
+        L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
+        PreserveLCSSA, OptLevel, OnlyWhenForced,
         ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
         ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
 
@@ -1257,6 +1266,7 @@ PreservedAnalyses LoopFullUnrollPass::ru
 
   bool Changed =
       tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+                      /*BFI*/ nullptr, /*PSI*/ nullptr,
                       /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
                       /*ForgetAllSCEV*/ false, /*Count*/ None,
                       /*Threshold*/ None, /*AllowPartial*/ false,
@@ -1359,6 +1369,8 @@ PreservedAnalyses LoopUnrollPass::run(Fu
       AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
   ProfileSummaryInfo *PSI =
       MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
 
   bool Changed = false;
 
@@ -1394,7 +1406,7 @@ PreservedAnalyses LoopUnrollPass::run(Fu
     // The API here is quite complex to call and we allow to select some
     // flavors of unrolling during construction time (by setting UnrollOpts).
     LoopUnrollResult Result = tryToUnrollLoop(
-        &L, DT, &LI, SE, TTI, AC, ORE,
+        &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
         /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
         /*ForgetAllSCEV*/ false, /*Count*/ None,
         /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,

Modified: llvm/trunk/lib/Transforms/Utils/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CMakeLists.txt?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/CMakeLists.txt (original)
+++ llvm/trunk/lib/Transforms/Utils/CMakeLists.txt Mon Apr 15 09:49:00 2019
@@ -51,6 +51,7 @@ add_llvm_library(LLVMTransformUtils
   SimplifyCFG.cpp
   SimplifyIndVar.cpp
   SimplifyLibCalls.cpp
+  SizeOpts.cpp
   SplitModule.cpp
   StripNonLineTableDebugInfo.cpp
   SymbolRewriter.cpp

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp Mon Apr 15 09:49:00 2019
@@ -16,8 +16,10 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -34,6 +36,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -2375,7 +2378,9 @@ Value *LibCallSimplifier::optimizeFPuts(
 
   // Don't rewrite fputs to fwrite when optimising for size because fwrite
   // requires more arguments and thus extra MOVs are required.
-  if (CI->getFunction()->hasOptSize())
+  bool OptForSize = CI->getFunction()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+  if (OptForSize)
     return nullptr;
 
   // Check if has any use
@@ -2750,9 +2755,10 @@ Value *LibCallSimplifier::optimizeCall(C
 LibCallSimplifier::LibCallSimplifier(
     const DataLayout &DL, const TargetLibraryInfo *TLI,
     OptimizationRemarkEmitter &ORE,
+    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
     function_ref<void(Instruction *, Value *)> Replacer,
     function_ref<void(Instruction *)> Eraser)
-    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+    : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
       UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
 
 void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {

Added: llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp?rev=358422&view=auto
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp (added)
+++ llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp Mon Apr 15 09:49:00 2019
@@ -0,0 +1,37 @@
+//===-- SizeOpts.cpp - code size optimization related code ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+using namespace llvm;
+
+static cl::opt<bool> ProfileGuidedSizeOpt(
+    "pgso", cl::Hidden, cl::init(true),
+    cl::desc("Enable the profile guided size optimization. "));
+
+bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+                                 BlockFrequencyInfo *BFI) {
+  assert(F);
+  if (!PSI || !BFI || !PSI->hasProfileSummary())
+    return false;
+  return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+}
+
+bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+                                 BlockFrequencyInfo *BFI) {
+  assert(BB);
+  if (!PSI || !BFI || !PSI->hasProfileSummary())
+    return false;
+  return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+}

Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Apr 15 09:49:00 2019
@@ -88,6 +88,7 @@
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -134,6 +135,7 @@
 #include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include <algorithm>
 #include <cassert>
@@ -1452,12 +1454,13 @@ struct LoopVectorize : public FunctionPa
     auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
     auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+    auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
 
     return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
-                        GetLAA, *ORE);
+                        GetLAA, *ORE, PSI);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1483,6 +1486,7 @@ struct LoopVectorize : public FunctionPa
 
     AU.addPreserved<BasicAAWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
   }
 };
 
@@ -6054,6 +6058,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapp
 INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
@@ -7147,7 +7152,8 @@ static bool processLoopInVPlanNativePath
     Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
     LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
     TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
-    OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
+    OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
+    ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
 
   assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
   Function *F = L->getHeader()->getParent();
@@ -7162,10 +7168,12 @@ static bool processLoopInVPlanNativePath
   // Get user vectorization factor.
   const unsigned UserVF = Hints.getWidth();
 
-  // Check the function attributes to find out if this function should be
-  // optimized for size.
+  // Check the function attributes and profiles to find out if this function
+  // should be optimized for size.
   bool OptForSize =
-      Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
+      Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+      (F->hasOptSize() ||
+       llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
 
   // Plan how to best vectorize, return the best VF and its cost.
   const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
@@ -7245,10 +7253,12 @@ bool LoopVectorizePass::processLoop(Loop
     return false;
   }
 
-  // Check the function attributes to find out if this function should be
-  // optimized for size.
+  // Check the function attributes and profiles to find out if this function
+  // should be optimized for size.
   bool OptForSize =
-      Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
+      Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+      (F->hasOptSize() ||
+       llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
 
   // Entrance to the VPlan-native vectorization path. Outer loops are processed
   // here. They may require CFG and instruction level transformations before
@@ -7257,7 +7267,7 @@ bool LoopVectorizePass::processLoop(Loop
   // pipeline.
   if (!L->empty())
     return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
-                                        ORE, Hints);
+                                        ORE, BFI, PSI, Hints);
 
   assert(L->empty() && "Inner loop expected.");
   // Check the loop for a trip count threshold: vectorize loops with a tiny trip
@@ -7523,7 +7533,7 @@ bool LoopVectorizePass::runImpl(
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
     DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
     std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
-    OptimizationRemarkEmitter &ORE_) {
+    OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
   SE = &SE_;
   LI = &LI_;
   TTI = &TTI_;
@@ -7535,6 +7545,7 @@ bool LoopVectorizePass::runImpl(
   GetLAA = &GetLAA_;
   DB = &DB_;
   ORE = &ORE_;
+  PSI = PSI_;
 
   // Don't attempt if
   // 1. the target claims to have no vector registers, and
@@ -7603,8 +7614,12 @@ PreservedAnalyses LoopVectorizePass::run
       LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
       return LAM.getResult<LoopAccessAnalysis>(L, AR);
     };
+    const ModuleAnalysisManager &MAM =
+        AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+    ProfileSummaryInfo *PSI =
+        MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
     bool Changed =
-        runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
+        runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
     if (!Changed)
       return PreservedAnalyses::all();
     PreservedAnalyses PA;

Modified: llvm/trunk/test/Other/new-pm-defaults.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/new-pm-defaults.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/new-pm-defaults.ll (original)
+++ llvm/trunk/test/Other/new-pm-defaults.ll Mon Apr 15 09:49:00 2019
@@ -106,6 +106,7 @@
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
 ; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
@@ -245,7 +246,6 @@
 ; CHECK-O-NEXT: Running pass: SLPVectorizerPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: LoopUnrollPass
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis

Modified: llvm/trunk/test/Other/new-pm-lto-defaults.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/new-pm-lto-defaults.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/new-pm-lto-defaults.ll (original)
+++ llvm/trunk/test/Other/new-pm-lto-defaults.ll Mon Apr 15 09:49:00 2019
@@ -69,6 +69,7 @@
 ; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
 ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
 ; CHECK-O2-NEXT: Running pass: InstCombinePass
+; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
 ; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass>

Modified: llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/new-pm-thinlto-defaults.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/new-pm-thinlto-defaults.ll (original)
+++ llvm/trunk/test/Other/new-pm-thinlto-defaults.ll Mon Apr 15 09:49:00 2019
@@ -88,6 +88,7 @@
 ; CHECK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
 ; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-O-NEXT: Running pass: SimplifyCFGPass
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
@@ -219,7 +220,6 @@
 ; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
-; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
 ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
 ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis

Modified: llvm/trunk/test/Other/opt-O2-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-O2-pipeline.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-O2-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-O2-pipeline.ll Mon Apr 15 09:49:00 2019
@@ -214,6 +214,8 @@
 ; CHECK-NEXT:       Scalar Evolution Analysis
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       Loop Access Analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       Loop Load Elimination
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results

Modified: llvm/trunk/test/Other/opt-O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-O3-pipeline.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-O3-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-O3-pipeline.ll Mon Apr 15 09:49:00 2019
@@ -219,6 +219,8 @@
 ; CHECK-NEXT:       Scalar Evolution Analysis
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       Loop Access Analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       Loop Load Elimination
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results

Modified: llvm/trunk/test/Other/opt-Os-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-Os-pipeline.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-Os-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-Os-pipeline.ll Mon Apr 15 09:49:00 2019
@@ -201,6 +201,8 @@
 ; CHECK-NEXT:       Scalar Evolution Analysis
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       Loop Access Analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
 ; CHECK-NEXT:       Loop Load Elimination
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results

Modified: llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll (original)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll Mon Apr 15 09:49:00 2019
@@ -1,4 +1,6 @@
 ; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
 
 ; There are different candidates here for the base constant: 1073876992 and
 ; 1073876996. But we don't want to see the latter because it results in
@@ -8,6 +10,7 @@ define void @foo() #0 {
 entry:
 ; CHECK-LABEL: @foo
 ; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+; CHECK-LABEL: @foo_pgso
   %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
   %or = or i32 %0, 1
   store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
@@ -40,3 +43,59 @@ entry:
 }
 
 attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
+
+define void @foo_pgso() #1 !prof !14 {
+entry:
+; PGSO-LABEL: @foo_pgso
+; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+; NPGSO-LABEL: @foo_pgso
+; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+  %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %or = or i32 %0, 1
+  store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and = and i32 %1, -117506048
+  store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and1 = and i32 %2, -17367041
+  store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %and2 = and i32 %3, -262145
+  store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+  %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+  %and3 = and i32 %4, -8323073
+  store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+  store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+  %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %or4 = or i32 %5, 65536
+  store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+  %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or6.i.i = or i32 %6, 16
+  store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %and7.i.i = and i32 %7, -4
+  store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  %or8.i.i = or i32 %8, 2
+  store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+  ret void
+}
+
+attributes #1 = { norecurse nounwind readnone uwtable }  ; no optsize or minsize
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

Modified: llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll Mon Apr 15 09:49:00 2019
@@ -2,6 +2,8 @@
 ; because it requires more arguments and thus extra MOVs are required.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO
 
 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@@ -26,3 +28,34 @@ declare i32 @fputs(i8* nocapture readonl
 
 attributes #0 = { nounwind optsize }
 attributes #1 = { nounwind optsize  }
+
+define i32 @main_pgso() local_unnamed_addr !prof !14 {
+entry:
+; PGSO-LABEL: @main_pgso(
+; PGSO-NOT: call i64 @fwrite
+; PGSO: call i32 @fputs
+; NPGSO-LABEL: @main_pgso(
+; NPGSO: call i64 @fwrite
+; NPGSO-NOT: call i32 @fputs
+
+  %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
+  %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
+  ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

Modified: llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll (original)
+++ llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll Mon Apr 15 09:49:00 2019
@@ -1,4 +1,6 @@
 ; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
 
 ; When optimizing for size don't eliminate in this loop because the loop would
 ; have to be versioned first because A and C may alias.
@@ -74,3 +76,54 @@ for.body:
 for.end:                                          ; preds = %for.body
   ret void
 }
+
+
+; PGSO-LABEL: @f_pgso(
+; NPGSO-LABEL: @f_pgso(
+define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 {
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+  %b = load i32, i32* %Bidx, align 4
+  %a_p1 = add i32 %b, 2
+  store i32 %a_p1, i32* %Aidx_next, align 4
+
+  %a = load i32, i32* %Aidx, align 4
+; PGSO: %c = mul i32 %a, 2
+; NPGSO-NOT: %c = mul i32 %a, 2
+  %c = mul i32 %a, 2
+  store i32 %c, i32* %Cidx, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll Mon Apr 15 09:49:00 2019
@@ -1,5 +1,7 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
 ; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
 
 
 ;///////////////////// TEST 1 //////////////////////////////
@@ -128,3 +130,47 @@ for.end:
 ; CHECK_NOCOUNT-LABEL: @Test4
 ; CHECK_NOCOUNT:      phi
 ; CHECK_NOCOUNT:      icmp
+
+;///////////////////// TEST 5 //////////////////////////////
+
+; This test shows that with PGO, this loop is cold and not unrolled.
+
+define i32 @Test5() !prof !14 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+  store i32 %i.05, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 24
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 42
+}
+
+; PGSO-LABEL: @Test5
+; PGSO:      phi
+; PGSO:      icmp
+; NPGSO-LABEL: @Test5
+; NPGSO-NOT:      phi
+; NPGSO-NOT:      icmp
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

Modified: llvm/trunk/test/Transforms/LoopVectorize/optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/optsize.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/optsize.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/optsize.ll Mon Apr 15 09:49:00 2019
@@ -2,6 +2,8 @@
 ; loop with the optimize for size or the minimize size attributes.
 ; REQUIRES: asserts
 ; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -loop-vectorize -pgso=false -S | FileCheck %s -check-prefix=NPGSO
 
 target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
 
@@ -36,6 +38,7 @@ define i32 @foo_minsize() #1 {
 ; CHECK-LABEL: @foo_minsize(
 ; CHECK-NOT: <2 x i8>
 ; CHECK-NOT: <4 x i8>
+; CHECK-LABEL: @foo_pgso(
 
 entry:
   br label %for.body
@@ -57,3 +60,43 @@ for.end:
 
 attributes #1 = { minsize }
 
+define i32 @foo_pgso() !prof !14 {
+; PGSO-LABEL: @foo_pgso(
+; PGSO-NOT: <{{[0-9]+}} x i8>
+; NPGSO-LABEL: @foo_pgso(
+; NPGSO: <{{[0-9]+}} x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %i.08, 202
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}




More information about the llvm-commits mailing list