[llvm] [CodeGen] Port `SelectOptimize` to new pass manager (PR #74920)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 10 23:39:37 PST 2023
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/74920
>From 67d51a6a3f37a48bc16376f5d0729d324616a628 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Sat, 9 Dec 2023 15:46:02 +0800
Subject: [PATCH] [CodeGen] Port `SelectOptimize` to new pass manager
---
.../include/llvm/CodeGen/CodeGenPassBuilder.h | 11 +-
.../llvm/CodeGen/MachinePassRegistry.def | 2 +-
llvm/include/llvm/CodeGen/SelectOptimize.h | 34 ++++
llvm/lib/CodeGen/SelectOptimize.cpp | 172 ++++++++++++------
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 1 +
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 3 +
...ert-highly-predictable-select-to-branch.ll | 4 +
.../test/CodeGen/AArch64/selectopt-logical.ll | 1 +
llvm/test/CodeGen/AArch64/selectopt.ll | 7 +
llvm/test/CodeGen/X86/select-optimize.ll | 1 +
11 files changed, 174 insertions(+), 63 deletions(-)
create mode 100644 llvm/include/llvm/CodeGen/SelectOptimize.h
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index bb139ef2eb3510..1c0e4fb1fae376 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
@@ -30,6 +31,7 @@
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/CodeGen/ReplaceWithVeclib.h"
#include "llvm/CodeGen/SafeStack.h"
+#include "llvm/CodeGen/SelectOptimize.h"
#include "llvm/CodeGen/UnreachableBlockElim.h"
#include "llvm/CodeGen/WasmEHPrepare.h"
#include "llvm/CodeGen/WinEHPrepare.h"
@@ -655,8 +657,13 @@ void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const {
addPass(ExpandReductionsPass());
// Convert conditional moves to conditional jumps when profitable.
- if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize)
- addPass(SelectOptimizePass());
+ if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize) {
+ // FIXME: this will trigger assertion!
+ // `ProfileSummaryAnalysis` is a module pass. `AddIRPass`
+ // doesn't support adding module passes between function passes.
+ addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ addPass(SelectOptimizePass(&TM));
+ }
}
/// Turn exception handling constructs into something the code generators can
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index e6e979a4582c7a..27845b4cdd3747 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -53,6 +53,7 @@ FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ())
FUNCTION_PASS("safe-stack", SafeStackPass, (TM))
FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ())
+FUNCTION_PASS("select-optimize", SelectOptimizePass, (TM))
FUNCTION_PASS("tlshoist", TLSVariableHoistPass, ())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ())
FUNCTION_PASS("verify", VerifierPass, ())
@@ -128,7 +129,6 @@ DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ())
DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ())
DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ())
DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ())
-DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ())
DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ())
DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ())
DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ())
diff --git a/llvm/include/llvm/CodeGen/SelectOptimize.h b/llvm/include/llvm/CodeGen/SelectOptimize.h
new file mode 100644
index 00000000000000..37024a154145a3
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/SelectOptimize.h
@@ -0,0 +1,34 @@
+//===--- llvm/CodeGen/SelectOptimize.h ---------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the SelectOptimizePass class,
+/// its corresponding pass name is `select-optimize`.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SELECTOPTIMIZE_H
+#define LLVM_CODEGEN_SELECTOPTIMIZE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class TargetMachine;
+
+class SelectOptimizePass : public PassInfoMixin<SelectOptimizePass> {
+ const TargetMachine *TM;
+
+public:
+ explicit SelectOptimizePass(const TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_SELECTOPTIMIZE_H
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 05413fb5d75826..2ad90d12ea6109 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectOptimize.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -96,36 +97,22 @@ static cl::opt<bool>
namespace {
-class SelectOptimize : public FunctionPass {
+class SelectOptimizeImpl {
const TargetMachine *TM = nullptr;
const TargetSubtargetInfo *TSI = nullptr;
const TargetLowering *TLI = nullptr;
const TargetTransformInfo *TTI = nullptr;
const LoopInfo *LI = nullptr;
- DominatorTree *DT = nullptr;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
+ BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
TargetSchedModel TSchedModel;
public:
- static char ID;
-
- SelectOptimize() : FunctionPass(ID) {
- initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetPassConfig>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- }
+ SelectOptimizeImpl() = default;
+ SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ bool runOnFunction(Function &F, Pass &P);
private:
// Select groups consist of consecutive select instructions with the same
@@ -211,25 +198,90 @@ class SelectOptimize : public FunctionPass {
// Returns true if the target architecture supports lowering a given select.
bool isSelectKindSupported(SelectInst *SI);
};
+
+class SelectOptimize : public FunctionPass {
+ SelectOptimizeImpl Impl;
+
+public:
+ static char ID;
+
+ SelectOptimize() : FunctionPass(ID) {
+ initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ return Impl.runOnFunction(F, *this);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ }
+};
+
} // namespace
+PreservedAnalyses SelectOptimizePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ SelectOptimizeImpl Impl(TM);
+ return Impl.run(F, FAM);
+}
+
char SelectOptimize::ID = 0;
INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
false)
FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
-bool SelectOptimize::runOnFunction(Function &F) {
- TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+PreservedAnalyses SelectOptimizeImpl::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ TSI = TM->getSubtargetImpl(F);
+ TLI = TSI->getTargetLowering();
+
+ // If none of the select types is supported then skip this pass.
+ // This is an optimization pass. Legality issues will be handled by
+ // instruction selection.
+ if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
+ !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
+ !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
+ return PreservedAnalyses::all();
+
+ TTI = &FAM.getResult<TargetIRAnalysis>(F);
+ if (!TTI->enableSelectOptimize())
+ return PreservedAnalyses::all();
+
+ PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
+ .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ assert(PSI && "This pass requires module analysis pass `profile-summary`!");
+ BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ // When optimizing for size, selects are preferable over branches.
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
+ return PreservedAnalyses::all();
+
+ LI = &FAM.getResult<LoopAnalysis>(F);
+ ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ TSchedModel.init(TSI);
+
+ bool Changed = optimizeSelects(F);
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
+ TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
TSI = TM->getSubtargetImpl(F);
TLI = TSI->getTargetLowering();
@@ -241,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) {
!TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
return false;
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
if (!TTI->enableSelectOptimize())
return false;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- BPI.reset(new BranchProbabilityInfo(F, *LI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
- PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
TSchedModel.init(TSI);
// When optimizing for size, selects are preferable over branches.
- if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get()))
+ if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
return false;
return optimizeSelects(F);
}
-bool SelectOptimize::optimizeSelects(Function &F) {
+bool SelectOptimizeImpl::optimizeSelects(Function &F) {
// Determine for which select groups it is profitable converting to branches.
SelectGroups ProfSIGroups;
// Base heuristics apply only to non-loops and outer loops.
@@ -277,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) {
return !ProfSIGroups.empty();
}
-void SelectOptimize::optimizeSelectsBase(Function &F,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
+ SelectGroups &ProfSIGroups) {
// Collect all the select groups.
SelectGroups SIGroups;
for (BasicBlock &BB : F) {
@@ -293,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F,
findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
}
-void SelectOptimize::optimizeSelectsInnerLoops(Function &F,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
+ SelectGroups &ProfSIGroups) {
SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
// Need to check size on each iteration as we accumulate child loops.
for (unsigned long i = 0; i < Loops.size(); ++i)
@@ -331,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue,
return V;
}
-void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
for (SelectGroup &ASI : ProfSIGroups) {
// The code transformation here is a modified version of the sinking
// transformation in CodeGenPrepare::optimizeSelectInst with a more
@@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) {
return false;
}
-void SelectOptimize::collectSelectGroups(BasicBlock &BB,
- SelectGroups &SIGroups) {
+void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
+ SelectGroups &SIGroups) {
BasicBlock::iterator BBIt = BB.begin();
while (BBIt != BB.end()) {
Instruction *I = &*BBIt++;
@@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB,
}
}
-void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
- SelectGroups &ProfSIGroups) {
+void SelectOptimizeImpl::findProfitableSIGroupsBase(
+ SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
for (SelectGroup &ASI : SIGroups) {
++NumSelectOptAnalyzed;
if (isConvertToBranchProfitableBase(ASI))
@@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
ORE->emit(Rem);
}
-void SelectOptimize::findProfitableSIGroupsInnerLoops(
+void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
NumSelectOptAnalyzed += SIGroups.size();
// For each select group in an inner-most loop,
// a branch is more preferable than a select/conditional-move if:
// i) conversion to branches for all the select groups of the loop satisfies
// loop-level heuristics including reducing the loop's critical path by
- // some threshold (see SelectOptimize::checkLoopHeuristics); and
+ // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
// ii) the total cost of the select group is cheaper with a branch compared
// to its predicated version. The cost is in terms of latency and the cost
// of a select group is the cost of its most expensive select instruction
@@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
}
}
-bool SelectOptimize::isConvertToBranchProfitableBase(
+bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
const SmallVector<SelectInst *, 2> &ASI) {
SelectInst *SI = ASI.front();
LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
@@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
// Skip cold basic blocks. Better to optimize for size for cold blocks.
- if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
+ if (PSI->isColdBlock(SI->getParent(), BFI)) {
++NumSelectColdBB;
ORmiss << "Not converted to branch because of cold basic block. ";
EmitAndPrintRemark(ORE, ORmiss);
@@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator,
return (Numerator + (Denominator / 2)) / Denominator;
}
-bool SelectOptimize::hasExpensiveColdOperand(
+bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SmallVector<SelectInst *, 2> &ASI) {
bool ColdOperand = false;
uint64_t TrueWeight, FalseWeight, TotalWeight;
@@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
// (sufficiently-accurate in practice), we populate this set with the
// instructions of the backwards dependence slice that only have one-use and
// form an one-use chain that leads to the source instruction.
-void SelectOptimize::getExclBackwardsSlice(Instruction *I,
- std::stack<Instruction *> &Slice,
- Instruction *SI, bool ForSinking) {
+void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
+ std::stack<Instruction *> &Slice,
+ Instruction *SI,
+ bool ForSinking) {
SmallPtrSet<Instruction *, 2> Visited;
std::queue<Instruction *> Worklist;
Worklist.push(I);
@@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
}
}
-bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) {
uint64_t TrueWeight, FalseWeight;
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t Max = std::max(TrueWeight, FalseWeight);
@@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
return false;
}
-bool SelectOptimize::checkLoopHeuristics(const Loop *L,
- const CostInfo LoopCost[2]) {
+bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
+ const CostInfo LoopCost[2]) {
// Loop-level checks to determine if a non-predicated version (with branches)
// of the loop is more profitable than its predicated version.
@@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
// and non-predicated version of the given loop.
// Returns false if unable to compute these costs due to invalid cost of loop
// instruction(s).
-bool SelectOptimize::computeLoopCosts(
+bool SelectOptimizeImpl::computeLoopCosts(
const Loop *L, const SelectGroups &SIGroups,
DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
@@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts(
}
SmallPtrSet<const Instruction *, 2>
-SelectOptimize::getSIset(const SelectGroups &SIGroups) {
+SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) {
SmallPtrSet<const Instruction *, 2> SIset;
for (const SelectGroup &ASI : SIGroups)
for (const SelectInst *SI : ASI)
@@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {
return SIset;
}
-std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+std::optional<uint64_t>
+SelectOptimizeImpl::computeInstCost(const Instruction *I) {
InstructionCost ICost =
TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
if (auto OC = ICost.getValue())
@@ -986,8 +1038,8 @@ std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
}
ScaledNumber<uint64_t>
-SelectOptimize::getMispredictionCost(const SelectInst *SI,
- const Scaled64 CondCost) {
+SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI,
+ const Scaled64 CondCost) {
uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
// Account for the default misprediction rate when using a branch
@@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI,
// Returns the cost of a branch when the prediction is correct.
// TrueCost * TrueProbability + FalseCost * FalseProbability.
ScaledNumber<uint64_t>
-SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
- const SelectInst *SI) {
+SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
+ const SelectInst *SI) {
Scaled64 PredPathCost;
uint64_t TrueWeight, FalseWeight;
if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
@@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
return PredPathCost;
}
-bool SelectOptimize::isSelectKindSupported(SelectInst *SI) {
+bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) {
bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
if (VectorCond)
return false;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f26450e9418700..1d5fb122a1ae98 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -79,6 +79,7 @@
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/InterleavedAccess.h"
#include "llvm/CodeGen/SafeStack.h"
+#include "llvm/CodeGen/SelectOptimize.h"
#include "llvm/CodeGen/TypePromotion.h"
#include "llvm/CodeGen/WasmEHPrepare.h"
#include "llvm/CodeGen/WinEHPrepare.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 56449906eb6562..fe8e0e2c8c9b87 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -396,6 +396,7 @@ FUNCTION_PASS("safe-stack", SafeStackPass(TM))
FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass())
FUNCTION_PASS("scalarizer", ScalarizerPass())
FUNCTION_PASS("sccp", SCCPPass())
+FUNCTION_PASS("select-optimize", SelectOptimizePass(TM))
FUNCTION_PASS("separate-const-offset-from-gep",
SeparateConstOffsetFromGEPPass())
FUNCTION_PASS("sink", SinkingPass())
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index f5c1c3c291cb58..638f26298ee26a 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -66,6 +66,9 @@
; CHECK-NEXT: Expand reduction intrinsics
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: TLS Variable Hoist
+; CHECK-NEXT: Post-Dominator Tree Construction
+; CHECK-NEXT: Branch Probability Analysis
+; CHECK-NEXT: Block Frequency Analysis
; CHECK-NEXT: Lazy Branch Probability Analysis
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: Optimization Remark Emitter
diff --git a/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll b/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll
index 156ec400d5e7f1..22fa8b1005335b 100644
--- a/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll
+++ b/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll
@@ -3,6 +3,10 @@
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=generic -S < %s | FileCheck %s --check-prefix=CHECK-GENERIC
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s
; Test has not predictable select, which should not be transformed to a branch
define i32 @test1(i32 %a) {
diff --git a/llvm/test/CodeGen/AArch64/selectopt-logical.ll b/llvm/test/CodeGen/AArch64/selectopt-logical.ll
index 635922e272c4fc..b8cc896b61d412 100644
--- a/llvm/test/CodeGen/AArch64/selectopt-logical.ll
+++ b/llvm/test/CodeGen/AArch64/selectopt-logical.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s
define i32 @test(ptr nocapture noundef readnone %x, i32 noundef %iters) {
; CHECK-LABEL: @test(
diff --git a/llvm/test/CodeGen/AArch64/selectopt.ll b/llvm/test/CodeGen/AArch64/selectopt.ll
index 46ac585b6d555c..8922eba0406cc7 100644
--- a/llvm/test/CodeGen/AArch64/selectopt.ll
+++ b/llvm/test/CodeGen/AArch64/selectopt.ll
@@ -6,6 +6,13 @@
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s --check-prefix=CHECKOO
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a710 -S < %s | FileCheck %s --check-prefix=CHECKOO
; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s --check-prefix=CHECKOO
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=generic -S < %s | FileCheck %s --check-prefix=CHECKOO
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a55 -S < %s | FileCheck %s --check-prefix=CHECKII
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a510 -S < %s | FileCheck %s --check-prefix=CHECKII
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s --check-prefix=CHECKOO
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s --check-prefix=CHECKOO
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a710 -S < %s | FileCheck %s --check-prefix=CHECKOO
+; RUN: opt -passes='require<profile-summary>,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s --check-prefix=CHECKOO
%struct.st = type { i32, i64, ptr, ptr, i16, ptr, ptr, i64, i64 }
diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll
index a53dd36d813e35..6e440654408753 100644
--- a/llvm/test/CodeGen/X86/select-optimize.ll
+++ b/llvm/test/CodeGen/X86/select-optimize.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-unknown -passes='require<profile-summary>,function(select-optimize)' -S < %s | FileCheck %s
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Test base heuristic 1:
More information about the llvm-commits
mailing list