[llvm] r358422 - [PGO] Profile guided code size optimization.
Hiroshi Yamauchi via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 15 09:49:01 PDT 2019
Author: yamauchi
Date: Mon Apr 15 09:49:00 2019
New Revision: 358422
URL: http://llvm.org/viewvc/llvm-project?rev=358422&view=rev
Log:
[PGO] Profile guided code size optimization.
Summary:
Enable some of the existing size optimizations for cold code under PGO.
A ~5% code size saving in big internal app under PGO.
The way it gets BFI/PSI is discussed in the RFC thread
http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html
Note it doesn't currently touch loop passes.
Reviewers: davidxl, eraman
Reviewed By: eraman
Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59514
Added:
llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h
llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp
Modified:
llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h
llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h
llvm/trunk/lib/Passes/PassBuilder.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp
llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/trunk/lib/Transforms/Utils/CMakeLists.txt
llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Other/new-pm-defaults.ll
llvm/trunk/test/Other/new-pm-lto-defaults.ll
llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
llvm/trunk/test/Other/opt-O2-pipeline.ll
llvm/trunk/test/Other/opt-O3-pipeline.ll
llvm/trunk/test/Other/opt-Os-pipeline.ll
llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll
llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll
llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
llvm/trunk/test/Transforms/LoopVectorize/optsize.ll
Modified: llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h (original)
+++ llvm/trunk/include/llvm/Transforms/Scalar/ConstantHoisting.h Mon Apr 15 09:49:00 2019
@@ -55,6 +55,7 @@ class DominatorTree;
class Function;
class GlobalVariable;
class Instruction;
+class ProfileSummaryInfo;
class TargetTransformInfo;
/// A private "module" namespace for types and utilities used by
@@ -124,7 +125,8 @@ public:
// Glue for old PM.
bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- BlockFrequencyInfo *BFI, BasicBlock &Entry);
+ BlockFrequencyInfo *BFI, BasicBlock &Entry,
+ ProfileSummaryInfo *PSI);
void cleanup() {
ClonedCastMap.clear();
@@ -148,6 +150,7 @@ private:
LLVMContext *Ctx;
const DataLayout *DL;
BasicBlock *Entry;
+ ProfileSummaryInfo *PSI;
/// Keeps track of constant candidates found in the function.
using ConstCandVecType = std::vector<consthoist::ConstantCandidate>;
Modified: llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/SimplifyLibCalls.h Mon Apr 15 09:49:00 2019
@@ -28,6 +28,8 @@ class TargetLibraryInfo;
class BasicBlock;
class Function;
class OptimizationRemarkEmitter;
+class BlockFrequencyInfo;
+class ProfileSummaryInfo;
/// This class implements simplifications for calls to fortified library
/// functions (__st*cpy_chk, __memcpy_chk, __memmove_chk, __memset_chk), to,
@@ -74,6 +76,8 @@ private:
const DataLayout &DL;
const TargetLibraryInfo *TLI;
OptimizationRemarkEmitter &ORE;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
bool UnsafeFPShrink;
function_ref<void(Instruction *, Value *)> Replacer;
function_ref<void(Instruction *)> Eraser;
@@ -101,6 +105,7 @@ public:
LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer =
&replaceAllUsesWithDefault,
function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
Added: llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h?rev=358422&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h (added)
+++ llvm/trunk/include/llvm/Transforms/Utils/SizeOpts.h Mon Apr 15 09:49:00 2019
@@ -0,0 +1,34 @@
+//===- llvm/Transforms/Utils/SizeOpts.h - size optimization -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
+#define LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
+
+namespace llvm {
+
+class BasicBlock;
+class BlockFrequencyInfo;
+class Function;
+class ProfileSummaryInfo;
+
+/// Returns true if function \p F is suggested to be size-optimized base on the
+/// profile.
+bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI);
+/// Returns true if basic block \p BB is suggested to be size-optimized base
+/// on the profile.
+bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI);
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SiZEOPTS_H
Modified: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h Mon Apr 15 09:49:00 2019
@@ -24,11 +24,13 @@ namespace llvm {
class AssumptionCache;
class BasicBlock;
+class BlockFrequencyInfo;
class DependenceInfo;
class DominatorTree;
class Loop;
class LoopInfo;
class MDNode;
+class ProfileSummaryInfo;
class OptimizationRemarkEmitter;
class ScalarEvolution;
@@ -120,7 +122,8 @@ void simplifyLoopAfterUnroll(Loop *L, bo
MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling);
Modified: llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h (original)
+++ llvm/trunk/include/llvm/Transforms/Vectorize/LoopVectorize.h Mon Apr 15 09:49:00 2019
@@ -71,6 +71,7 @@ class Loop;
class LoopAccessInfo;
class LoopInfo;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class ScalarEvolution;
class TargetLibraryInfo;
class TargetTransformInfo;
@@ -96,6 +97,7 @@ struct LoopVectorizePass : public PassIn
AssumptionCache *AC;
std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
OptimizationRemarkEmitter *ORE;
+ ProfileSummaryInfo *PSI;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -105,7 +107,7 @@ struct LoopVectorizePass : public PassIn
BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
- OptimizationRemarkEmitter &ORE);
+ OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_);
bool processLoop(Loop *L);
};
Modified: llvm/trunk/lib/Passes/PassBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Passes/PassBuilder.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Passes/PassBuilder.cpp (original)
+++ llvm/trunk/lib/Passes/PassBuilder.cpp Mon Apr 15 09:49:00 2019
@@ -575,8 +575,12 @@ void PassBuilder::addPGOInstrPasses(Modu
Options.DoCounterPromotion = true;
Options.UseBFIInPromotion = IsCS;
MPM.addPass(InstrProfiling(Options, IsCS));
- } else if (!ProfileFile.empty())
+ } else if (!ProfileFile.empty()) {
MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ }
}
static InlineParams
@@ -649,6 +653,9 @@ PassBuilder::buildModuleSimplificationPi
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
Phase == ThinLTOPhase::PreLink));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the ThinLTO backend.
if (Phase != ThinLTOPhase::PreLink)
@@ -1065,6 +1072,9 @@ PassBuilder::buildLTODefaultPipeline(Opt
MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
false /* ThinLTOPhase::PreLink */));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
}
// Remove unused virtual tables to improve the quality of code generated by
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Apr 15 09:49:00 2019
@@ -4178,7 +4178,7 @@ Instruction *InstCombiner::tryOptimizeCa
auto InstCombineErase = [this](Instruction *I) {
eraseInstFromFunction(*I);
};
- LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
+ LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h Mon Apr 15 09:49:00 2019
@@ -52,12 +52,14 @@ namespace llvm {
class APInt;
class AssumptionCache;
+class BlockFrequencyInfo;
class DataLayout;
class DominatorTree;
class GEPOperator;
class GlobalVariable;
class LoopInfo;
class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
class TargetLibraryInfo;
class User;
@@ -304,6 +306,8 @@ private:
const DataLayout &DL;
const SimplifyQuery SQ;
OptimizationRemarkEmitter &ORE;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
// Optional analyses. When non-null, these can both be used to do better
// combining and will be updated to reflect any changes.
@@ -315,11 +319,11 @@ public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, const DataLayout &DL,
- LoopInfo *LI)
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
- DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {}
+ DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {}
/// Run the combiner over the entire worklist until it is empty.
///
Modified: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Mon Apr 15 09:49:00 2019
@@ -46,14 +46,17 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3478,7 +3481,8 @@ static bool prepareICWorklistFromFunctio
static bool combineInstructionsOverFunction(
Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT,
- OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,
LoopInfo *LI = nullptr) {
auto &DL = F.getParent()->getDataLayout();
ExpensiveCombines |= EnableExpensiveCombines;
@@ -3509,7 +3513,7 @@ static bool combineInstructionsOverFunct
MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA,
- AC, TLI, DT, ORE, DL, LI);
+ AC, TLI, DT, ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
if (!IC.run())
@@ -3529,8 +3533,15 @@ PreservedAnalyses InstCombinePass::run(F
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
+
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- ExpensiveCombines, LI))
+ BFI, PSI, ExpensiveCombines, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3554,6 +3565,8 @@ void InstructionCombiningPass::getAnalys
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3570,9 +3583,15 @@ bool InstructionCombiningPass::runOnFunc
// Optional analyses.
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ BlockFrequencyInfo *BFI =
+ (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE,
- ExpensiveCombines, LI);
+ BFI, PSI, ExpensiveCombines, LI);
}
char InstructionCombiningPass::ID = 0;
@@ -3585,6 +3604,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTree
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
"Combine redundant instructions", false, false)
Modified: llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ConstantHoisting.cpp Mon Apr 15 09:49:00 2019
@@ -41,6 +41,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
@@ -60,6 +61,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -111,6 +113,7 @@ public:
if (ConstHoistWithBlockFrequency)
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -126,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLe
"Constant Hoisting", false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
@@ -148,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFu
ConstHoistWithBlockFrequency
? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
: nullptr,
- Fn.getEntryBlock());
+ Fn.getEntryBlock(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
if (MadeChange) {
LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsI
ConstCandVecType::iterator &MaxCostItr) {
unsigned NumUses = 0;
- if(!Entry->getParent()->hasOptSize() || std::distance(S,E) > 100) {
+ bool OptForSize = Entry->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI);
+ if (!OptForSize || std::distance(S,E) > 100) {
for (auto ConstCand = S; ConstCand != E; ++ConstCand) {
NumUses += ConstCand->Uses.size();
if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost)
@@ -919,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCas
/// Optimize expensive integer constants in the given function.
bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
DominatorTree &DT, BlockFrequencyInfo *BFI,
- BasicBlock &Entry) {
+ BasicBlock &Entry, ProfileSummaryInfo *PSI) {
this->TTI = &TTI;
this->DT = &DT;
this->BFI = BFI;
this->DL = &Fn.getParent()->getDataLayout();
this->Ctx = &Fn.getContext();
this->Entry = &Entry;
+ this->PSI = PSI;
// Collect all constant candidates.
collectConstantCandidates(Fn);
@@ -962,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::
auto BFI = ConstHoistWithBlockFrequency
? &AM.getResult<BlockFrequencyAnalysis>(F)
: nullptr;
- if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
+ auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
Modified: llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopLoadElimination.cpp Mon Apr 15 09:49:00 2019
@@ -29,11 +29,14 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -54,6 +57,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <forward_list>
@@ -159,8 +163,9 @@ namespace {
class LoadEliminationForLoop {
public:
LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
- DominatorTree *DT)
- : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {}
+ DominatorTree *DT, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo* PSI)
+ : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {}
/// Look through the loop-carried and loop-independent dependences in
/// this loop and find store->load dependences.
@@ -529,7 +534,11 @@ public:
}
if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
- if (L->getHeader()->getParent()->hasOptSize()) {
+ auto *HeaderBB = L->getHeader();
+ auto *F = HeaderBB->getParent();
+ bool OptForSize = F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI);
+ if (OptForSize) {
LLVM_DEBUG(
dbgs() << "Versioning is needed but not allowed when optimizing "
"for size.\n");
@@ -572,6 +581,8 @@ private:
LoopInfo *LI;
const LoopAccessInfo &LAI;
DominatorTree *DT;
+ BlockFrequencyInfo *BFI;
+ ProfileSummaryInfo *PSI;
PredicatedScalarEvolution PSE;
};
@@ -579,6 +590,7 @@ private:
static bool
eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
// Build up a worklist of inner-loops to transform to avoid iterator
// invalidation.
@@ -597,7 +609,7 @@ eliminateLoadsAcrossLoops(Function &F, L
bool Changed = false;
for (Loop *L : Worklist) {
// The actual work is performed by LoadEliminationForLoop.
- LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+ LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
Changed |= LEL.processLoop();
}
return Changed;
@@ -622,10 +634,14 @@ public:
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
// Process each loop nest in the function.
return eliminateLoadsAcrossLoops(
- F, LI, DT,
+ F, LI, DT, BFI, PSI,
[&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
@@ -638,6 +654,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
}
};
@@ -653,6 +671,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLeg
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
FunctionPass *llvm::createLoopLoadEliminationPass() {
@@ -668,13 +688,17 @@ PreservedAnalyses LoopLoadEliminationPas
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
MemorySSA *MSSA = EnableMSSALoopDependency
? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
: nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
- F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
+ F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp Mon Apr 15 09:49:00 2019
@@ -294,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, Dominator
return LoopUnrollResult::Unmodified;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, OptLevel, None, None, None, None, None, None);
+ L, SE, TTI, nullptr, nullptr, OptLevel,
+ None, None, None, None, None, None);
if (AllowUnrollAndJam.getNumOccurrences() > 0)
UP.UnrollAndJam = AllowUnrollAndJam;
if (UnrollAndJamThreshold.getNumOccurrences() > 0)
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Mon Apr 15 09:49:00 2019
@@ -23,7 +23,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -55,6 +57,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
#include <cassert>
@@ -165,7 +168,8 @@ static const unsigned NoThreshold = std:
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel,
+ Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) {
@@ -198,7 +202,9 @@ TargetTransformInfo::UnrollingPreference
TTI.getUnrollingPreferences(L, SE, UP);
// Apply size attributes
- if (L->getHeader()->getParent()->hasOptSize()) {
+ bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI);
+ if (OptForSize) {
UP.Threshold = UP.OptSizeThreshold;
UP.PartialThreshold = UP.PartialOptSizeThreshold;
}
@@ -963,7 +969,9 @@ bool llvm::computeUnrollCount(
static LoopUnrollResult tryToUnrollLoop(
Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
- OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel,
+ OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ bool PreserveLCSSA, int OptLevel,
bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial,
Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound,
@@ -989,7 +997,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount,
+ L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedAllowPeeling);
// Exit early if unrolling is disabled.
@@ -1176,7 +1184,8 @@ public:
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
LoopUnrollResult Result = tryToUnrollLoop(
- L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced,
+ L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr,
+ PreserveLCSSA, OptLevel, OnlyWhenForced,
ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling);
@@ -1257,6 +1266,7 @@ PreservedAnalyses LoopFullUnrollPass::ru
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE,
+ /*BFI*/ nullptr, /*PSI*/ nullptr,
/*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
/*ForgetAllSCEV*/ false, /*Count*/ None,
/*Threshold*/ None, /*AllowPartial*/ false,
@@ -1359,6 +1369,8 @@ PreservedAnalyses LoopUnrollPass::run(Fu
AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
ProfileSummaryInfo *PSI =
MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
bool Changed = false;
@@ -1394,7 +1406,7 @@ PreservedAnalyses LoopUnrollPass::run(Fu
// The API here is quite complex to call and we allow to select some
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
- &L, DT, &LI, SE, TTI, AC, ORE,
+ &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
/*ForgetAllSCEV*/ false, /*Count*/ None,
/*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
Modified: llvm/trunk/lib/Transforms/Utils/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/CMakeLists.txt?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/CMakeLists.txt (original)
+++ llvm/trunk/lib/Transforms/Utils/CMakeLists.txt Mon Apr 15 09:49:00 2019
@@ -51,6 +51,7 @@ add_llvm_library(LLVMTransformUtils
SimplifyCFG.cpp
SimplifyIndVar.cpp
SimplifyLibCalls.cpp
+ SizeOpts.cpp
SplitModule.cpp
StripNonLineTableDebugInfo.cpp
SymbolRewriter.cpp
Modified: llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyLibCalls.cpp Mon Apr 15 09:49:00 2019
@@ -16,8 +16,10 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -34,6 +36,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
using namespace PatternMatch;
@@ -2375,7 +2378,9 @@ Value *LibCallSimplifier::optimizeFPuts(
// Don't rewrite fputs to fwrite when optimising for size because fwrite
// requires more arguments and thus extra MOVs are required.
- if (CI->getFunction()->hasOptSize())
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ if (OptForSize)
return nullptr;
// Check if has any use
@@ -2750,9 +2755,10 @@ Value *LibCallSimplifier::optimizeCall(C
LibCallSimplifier::LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
OptimizationRemarkEmitter &ORE,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer,
function_ref<void(Instruction *)> Eraser)
- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE),
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
Added: llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp?rev=358422&view=auto
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp (added)
+++ llvm/trunk/lib/Transforms/Utils/SizeOpts.cpp Mon Apr 15 09:49:00 2019
@@ -0,0 +1,37 @@
+//===-- SizeOpts.cpp - code size optimization related code ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared code size optimization related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
+using namespace llvm;
+
+static cl::opt<bool> ProfileGuidedSizeOpt(
+ "pgso", cl::Hidden, cl::init(true),
+ cl::desc("Enable the profile guided size optimization. "));
+
+bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ assert(F);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+}
+
+bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
+ assert(BB);
+ if (!PSI || !BFI || !PSI->hasProfileSummary())
+ return false;
+ return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+}
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Apr 15 09:49:00 2019
@@ -88,6 +88,7 @@
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -134,6 +135,7 @@
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
@@ -1452,12 +1454,13 @@ struct LoopVectorize : public FunctionPa
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
- GetLAA, *ORE);
+ GetLAA, *ORE, PSI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1483,6 +1486,7 @@ struct LoopVectorize : public FunctionPa
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
};
@@ -6054,6 +6058,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapp
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
@@ -7147,7 +7152,8 @@ static bool processLoopInVPlanNativePath
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
+ OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
@@ -7162,10 +7168,12 @@ static bool processLoopInVPlanNativePath
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Plan how to best vectorize, return the best VF and its cost.
const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
@@ -7245,10 +7253,12 @@ bool LoopVectorizePass::processLoop(Loop
return false;
}
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
@@ -7257,7 +7267,7 @@ bool LoopVectorizePass::processLoop(Loop
// pipeline.
if (!L->empty())
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
- ORE, Hints);
+ ORE, BFI, PSI, Hints);
assert(L->empty() && "Inner loop expected.");
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
@@ -7523,7 +7533,7 @@ bool LoopVectorizePass::runImpl(
DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
- OptimizationRemarkEmitter &ORE_) {
+ OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
SE = &SE_;
LI = &LI_;
TTI = &TTI_;
@@ -7535,6 +7545,7 @@ bool LoopVectorizePass::runImpl(
GetLAA = &GetLAA_;
DB = &DB_;
ORE = &ORE_;
+ PSI = PSI_;
// Don't attempt if
// 1. the target claims to have no vector registers, and
@@ -7603,8 +7614,12 @@ PreservedAnalyses LoopVectorizePass::run
LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
bool Changed =
- runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
+ runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
Modified: llvm/trunk/test/Other/new-pm-defaults.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/new-pm-defaults.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/new-pm-defaults.ll (original)
+++ llvm/trunk/test/Other/new-pm-defaults.ll Mon Apr 15 09:49:00 2019
@@ -106,6 +106,7 @@
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
@@ -245,7 +246,6 @@
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: LoopUnrollPass
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
Modified: llvm/trunk/test/Other/new-pm-lto-defaults.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/new-pm-lto-defaults.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/new-pm-lto-defaults.ll (original)
+++ llvm/trunk/test/Other/new-pm-lto-defaults.ll Mon Apr 15 09:49:00 2019
@@ -69,6 +69,7 @@
; CHECK-O2-NEXT: Starting llvm::Function pass manager run.
; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass
; CHECK-O2-NEXT: Running pass: InstCombinePass
+; CHECK-O2-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass>
Modified: llvm/trunk/test/Other/new-pm-thinlto-defaults.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/new-pm-thinlto-defaults.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/new-pm-thinlto-defaults.ll (original)
+++ llvm/trunk/test/Other/new-pm-thinlto-defaults.ll Mon Apr 15 09:49:00 2019
@@ -88,6 +88,7 @@
; CHECK-O-NEXT: Running pass: InstCombinePass
; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O-NEXT: Running analysis: AAManager
+; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
; CHECK-O-NEXT: Finished llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA
@@ -219,7 +220,6 @@
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
-; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis
Modified: llvm/trunk/test/Other/opt-O2-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-O2-pipeline.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-O2-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-O2-pipeline.ll Mon Apr 15 09:49:00 2019
@@ -214,6 +214,8 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Access Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Loop Load Elimination
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
Modified: llvm/trunk/test/Other/opt-O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-O3-pipeline.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-O3-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-O3-pipeline.ll Mon Apr 15 09:49:00 2019
@@ -219,6 +219,8 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Access Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Loop Load Elimination
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
Modified: llvm/trunk/test/Other/opt-Os-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/opt-Os-pipeline.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Other/opt-Os-pipeline.ll (original)
+++ llvm/trunk/test/Other/opt-Os-pipeline.ll Mon Apr 15 09:49:00 2019
@@ -201,6 +201,8 @@
; CHECK-NEXT: Scalar Evolution Analysis
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Loop Access Analysis
+; CHECK-NEXT: Lazy Branch Probability Analysis
+; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Loop Load Elimination
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
; CHECK-NEXT: Function Alias Analysis Results
Modified: llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll (original)
+++ llvm/trunk/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll Mon Apr 15 09:49:00 2019
@@ -1,4 +1,6 @@
; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
; There are different candidates here for the base constant: 1073876992 and
; 1073876996. But we don't want to see the latter because it results in
@@ -8,6 +10,7 @@ define void @foo() #0 {
entry:
; CHECK-LABEL: @foo
; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+; CHECK-LABEL: @foo_pgso
%0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
%or = or i32 %0, 1
store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
@@ -40,3 +43,59 @@ entry:
}
attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
+
+define void @foo_pgso() #1 !prof !14 {
+entry:
+; PGSO-LABEL: @foo_pgso
+; PGSO-NOT: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+; NPGSO-LABEL: @foo_pgso
+; NPGSO: [[CONST2:%const_mat[0-9]*]] = add i32 %const, -4
+ %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %or = or i32 %0, 1
+ store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and = and i32 %1, -117506048
+ store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and1 = and i32 %2, -17367041
+ store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and2 = and i32 %3, -262145
+ store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and3 = and i32 %4, -8323073
+ store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+ store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+ %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %or4 = or i32 %5, 65536
+ store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or6.i.i = or i32 %6, 16
+ store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %and7.i.i = and i32 %7, -4
+ store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or8.i.i = or i32 %8, 2
+ store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ ret void
+}
+
+attributes #1 = { norecurse nounwind readnone uwtable } ; no optsize or minsize
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
Modified: llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fputs-opt-size.ll Mon Apr 15 09:49:00 2019
@@ -2,6 +2,8 @@
; because it requires more arguments and thus extra MOVs are required.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -instcombine -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -instcombine -pgso=false -S | FileCheck %s -check-prefix=NPGSO
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
@@ -26,3 +28,34 @@ declare i32 @fputs(i8* nocapture readonl
attributes #0 = { nounwind optsize }
attributes #1 = { nounwind optsize }
+
+define i32 @main_pgso() local_unnamed_addr !prof !14 {
+entry:
+; PGSO-LABEL: @main_pgso(
+; PGSO-NOT: call i64 @fwrite
+; PGSO: call i32 @fputs
+; NPGSO-LABEL: @main_pgso(
+; NPGSO: call i64 @fwrite
+; NPGSO-NOT: call i32 @fputs
+
+ %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
+ %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
Modified: llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll (original)
+++ llvm/trunk/test/Transforms/LoopLoadElim/opt-size.ll Mon Apr 15 09:49:00 2019
@@ -1,4 +1,6 @@
; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -basicaa -loop-load-elim -pgso -S < %s | FileCheck %s -check-prefix=PGSO
+; RUN: opt -basicaa -loop-load-elim -pgso=false -S < %s | FileCheck %s -check-prefix=NPGSO
; When optimizing for size don't eliminate in this loop because the loop would
; have to be versioned first because A and C may alias.
@@ -74,3 +76,54 @@ for.body:
for.end: ; preds = %for.body
ret void
}
+
+
+; PGSO-LABEL: @f_pgso(
+; NPGSO-LABEL: @f_pgso(
+define void @f_pgso(i32* %A, i32* %B, i32* %C, i64 %N) !prof !14 {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; PGSO: %c = mul i32 %a, 2
+; NPGSO-NOT: %c = mul i32 %a, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-opt-attribute.ll Mon Apr 15 09:49:00 2019
@@ -1,5 +1,7 @@
; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso | FileCheck -check-prefix=PGSO %s
+; RUN: opt < %s -S -passes='require<profile-summary>,function(unroll)' -pgso=false | FileCheck -check-prefix=NPGSO %s
;///////////////////// TEST 1 //////////////////////////////
@@ -128,3 +130,47 @@ for.end:
; CHECK_NOCOUNT-LABEL: @Test4
; CHECK_NOCOUNT: phi
; CHECK_NOCOUNT: icmp
+
+;///////////////////// TEST 5 //////////////////////////////
+
+; This test shows that with PGO, this loop is cold and not unrolled.
+
+define i32 @Test5() !prof !14 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+ store i32 %i.05, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 24
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 42
+}
+
+; PGSO-LABEL: @Test5
+; PGSO: phi
+; PGSO: icmp
+; NPGSO-LABEL: @Test5
+; NPGSO-NOT: phi
+; NPGSO-NOT: icmp
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
Modified: llvm/trunk/test/Transforms/LoopVectorize/optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/optsize.ll?rev=358422&r1=358421&r2=358422&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/optsize.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/optsize.ll Mon Apr 15 09:49:00 2019
@@ -2,6 +2,8 @@
; loop with the optimize for size or the minimize size attributes.
; REQUIRES: asserts
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -pgso -S | FileCheck %s -check-prefix=PGSO
+; RUN: opt < %s -loop-vectorize -pgso=false -S | FileCheck %s -check-prefix=NPGSO
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
@@ -36,6 +38,7 @@ define i32 @foo_minsize() #1 {
; CHECK-LABEL: @foo_minsize(
; CHECK-NOT: <2 x i8>
; CHECK-NOT: <4 x i8>
+; CHECK-LABEL: @foo_pgso(
entry:
br label %for.body
@@ -57,3 +60,43 @@ for.end:
attributes #1 = { minsize }
+define i32 @foo_pgso() !prof !14 {
+; PGSO-LABEL: @foo_pgso(
+; PGSO-NOT: <{{[0-9]+}} x i8>
+; NPGSO-LABEL: @foo_pgso(
+; NPGSO: <{{[0-9]+}} x i8>
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, 202
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
More information about the llvm-commits
mailing list