[llvm] Globaloutline2 (PR #105443)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 22:03:26 PDT 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/105443
>From a076fd25c26734c1926a4f0d2f585e8f6bf18a21 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Wed, 24 Apr 2024 09:40:34 -0700
Subject: [PATCH 1/3] [MachineOutliner][NFC] Refactor
---
llvm/include/llvm/CodeGen/MachineOutliner.h | 5 +-
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 12 ++++-
llvm/lib/CodeGen/MachineOutliner.cpp | 55 +++++++++++---------
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +--
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 +-
5 files changed, 48 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index eaba6c9b18f2bb..84937a8b563ac0 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -234,11 +234,11 @@ struct OutlinedFunction {
unsigned FrameConstructionID = 0;
/// Return the number of candidates for this \p OutlinedFunction.
- unsigned getOccurrenceCount() const { return Candidates.size(); }
+ virtual unsigned getOccurrenceCount() const { return Candidates.size(); }
/// Return the number of bytes it would take to outline this
/// function.
- unsigned getOutliningCost() const {
+ virtual unsigned getOutliningCost() const {
unsigned CallOverhead = 0;
for (const Candidate &C : Candidates)
CallOverhead += C.getCallOverhead();
@@ -272,6 +272,7 @@ struct OutlinedFunction {
}
OutlinedFunction() = delete;
+ virtual ~OutlinedFunction() = default;
};
} // namespace outliner
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 882cadea223695..a833a541e4e025 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2088,14 +2088,22 @@ class TargetInstrInfo : public MCInstrInfo {
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
/// information for a set of outlining candidates. Returns std::nullopt if the
- /// candidates are not suitable for outlining.
+ /// candidates are not suitable for outlining. \p MinRep is the minimum
+ /// number of times the instruction sequence must be repeated.
virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MipRep) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
}
+ virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ const MachineModuleInfo &MMI,
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ return getOutliningCandidateInfo(MMI, RepeatedSequenceLocs, /*MipRep=*/2);
+ }
+
/// Optional target hook to create the LLVM IR attributes for the outlined
/// function. If overridden, the overriding function must call the default
/// implementation.
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 4b56a467b8d076..eecf27613a2c31 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -456,8 +456,9 @@ struct MachineOutliner : public ModulePass {
/// \param Mapper Contains outlining mapping information.
/// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
/// each type of candidate.
- void findCandidates(InstructionMapper &Mapper,
- std::vector<OutlinedFunction> &FunctionList);
+ void
+ findCandidates(InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
/// Replace the sequences of instructions represented by \p OutlinedFunctions
/// with calls to functions.
@@ -465,7 +466,9 @@ struct MachineOutliner : public ModulePass {
/// \param M The module we are outlining from.
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
- bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
+ /// \param[out] OutlinedFunctionNum The outlined function number.
+ bool outline(Module &M,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
/// Creates a function for \p OF and inserts it into the module.
@@ -583,7 +586,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
}
void MachineOutliner::findCandidates(
- InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
@@ -684,7 +688,7 @@ void MachineOutliner::findCandidates(
continue;
}
- FunctionList.push_back(*OF);
+ FunctionList.push_back(std::make_unique<OutlinedFunction>(*OF));
}
}
@@ -827,10 +831,9 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
return &MF;
}
-bool MachineOutliner::outline(Module &M,
- std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper,
- unsigned &OutlinedFunctionNum) {
+bool MachineOutliner::outline(
+ Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
+ InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
<< "\n");
@@ -838,23 +841,23 @@ bool MachineOutliner::outline(Module &M,
// Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
// The function with highest priority should be outlined first.
- stable_sort(FunctionList,
- [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
- return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
- RHS.getNotOutlinedCost() * LHS.getOutliningCost();
- });
+ stable_sort(FunctionList, [](const std::unique_ptr<OutlinedFunction> &LHS,
+ const std::unique_ptr<OutlinedFunction> &RHS) {
+ return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
+ RHS->getNotOutlinedCost() * LHS->getOutliningCost();
+ });
// Walk over each function, outlining them as we go along. Functions are
// outlined greedily, based off the sort above.
auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
- for (OutlinedFunction &OF : FunctionList) {
+ for (auto &OF : FunctionList) {
#ifndef NDEBUG
- auto NumCandidatesBefore = OF.Candidates.size();
+ auto NumCandidatesBefore = OF->Candidates.size();
#endif
// If we outlined something that overlapped with a candidate in a previous
// step, then we can't outline from it.
- erase_if(OF.Candidates, [&UnsignedVecBegin](Candidate &C) {
+ erase_if(OF->Candidates, [&UnsignedVecBegin](Candidate &C) {
return std::any_of(UnsignedVecBegin + C.getStartIdx(),
UnsignedVecBegin + C.getEndIdx() + 1, [](unsigned I) {
return I == static_cast<unsigned>(-1);
@@ -862,36 +865,36 @@ bool MachineOutliner::outline(Module &M,
});
#ifndef NDEBUG
- auto NumCandidatesAfter = OF.Candidates.size();
+ auto NumCandidatesAfter = OF->Candidates.size();
LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
<< "/" << NumCandidatesBefore << " candidates\n");
#endif
// If we made it unbeneficial to outline this function, skip it.
- if (OF.getBenefit() < OutlinerBenefitThreshold) {
- LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF.getBenefit()
+ if (OF->getBenefit() < OutlinerBenefitThreshold) {
+ LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
<< " B) < threshold (" << OutlinerBenefitThreshold
<< " B)\n");
continue;
}
- LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF.getBenefit()
+ LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
<< " B) > threshold (" << OutlinerBenefitThreshold
<< " B)\n");
// It's beneficial. Create the function and outline its sequence's
// occurrences.
- OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
- emitOutlinedFunctionRemark(OF);
+ OF->MF = createOutlinedFunction(M, *OF, Mapper, OutlinedFunctionNum);
+ emitOutlinedFunctionRemark(*OF);
FunctionsCreated++;
OutlinedFunctionNum++; // Created a function, move to the next name.
- MachineFunction *MF = OF.MF;
+ MachineFunction *MF = OF->MF;
const TargetSubtargetInfo &STI = MF->getSubtarget();
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Replace occurrences of the sequence with calls to the new function.
LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
- for (Candidate &C : OF.Candidates) {
+ for (Candidate &C : OF->Candidates) {
MachineBasicBlock &MBB = *C.getMBB();
MachineBasicBlock::iterator StartIt = C.begin();
MachineBasicBlock::iterator EndIt = std::prev(C.end());
@@ -1180,7 +1183,7 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
// Prepare instruction mappings for the suffix tree.
populateMapper(Mapper, M);
- std::vector<OutlinedFunction> FunctionList;
+ std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
// Find all of the outlining candidates.
findCandidates(Mapper, FunctionList);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 697ae510a95655..156ab6568f833e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -8687,7 +8687,8 @@ static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
std::optional<outliner::OutlinedFunction>
AArch64InstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRep) const {
unsigned SequenceSize = 0;
for (auto &MI : RepeatedSequenceLocs[0])
SequenceSize += getInstSizeInBytes(MI);
@@ -8801,7 +8802,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRep)
return std::nullopt;
}
@@ -9048,7 +9049,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
}
// If we dropped all of the candidates, bail out here.
- if (RepeatedSequenceLocs.size() < 2) {
+ if (RepeatedSequenceLocs.size() < MinRep) {
RepeatedSequenceLocs.clear();
return std::nullopt;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index a1f2fbff016312..762fb9873065e6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -473,7 +473,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool OutlineFromLinkOnceODRs) const override;
std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRep) const override;
void mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI,
>From 8c7c1f3015ce17c146b6f91b0db4fe25d83d37d3 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Thu, 22 Aug 2024 00:08:01 -0700
Subject: [PATCH 2/3] Address comments from ellishg
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 13 ++++---------
llvm/lib/CodeGen/MachineOutliner.cpp | 15 +++++++++------
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 12 ++++++------
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 5 +++--
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 17 +++++++++--------
llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 6 ++++--
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 12 +++++++-----
llvm/lib/Target/RISCV/RISCVInstrInfo.h | 6 ++++--
llvm/lib/Target/X86/X86InstrInfo.cpp | 16 +++++++++-------
llvm/lib/Target/X86/X86InstrInfo.h | 6 ++++--
10 files changed, 59 insertions(+), 49 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index a833a541e4e025..3fc26dd85cb1dd 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2088,22 +2088,17 @@ class TargetInstrInfo : public MCInstrInfo {
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
/// information for a set of outlining candidates. Returns std::nullopt if the
- /// candidates are not suitable for outlining. \p MinRep is the minimum
+ /// candidates are not suitable for outlining. \p MinRepeates is the minimum
/// number of times the instruction sequence must be repeated.
- virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ virtual std::optional<std::unique_ptr<outliner::OutlinedFunction>>
+ getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
- unsigned MipRep) const {
+ unsigned MinRepeates) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
}
- virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
- const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
- return getOutliningCandidateInfo(MMI, RepeatedSequenceLocs, /*MipRep=*/2);
- }
-
/// Optional target hook to create the LLVM IR attributes for the outlined
/// function. If overridden, the overriding function must call the default
/// implementation.
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index eecf27613a2c31..ed194355f72add 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -674,21 +674,24 @@ void MachineOutliner::findCandidates(
const TargetInstrInfo *TII =
CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
- std::optional<OutlinedFunction> OF =
- TII->getOutliningCandidateInfo(*MMI, CandidatesForRepeatedSeq);
+ unsigned MinRepeates = 2;
+ std::optional<std::unique_ptr<OutlinedFunction>> OF =
+ TII->getOutliningCandidateInfo(*MMI, CandidatesForRepeatedSeq,
+ MinRepeates);
// If we deleted too many candidates, then there's nothing worth outlining.
// FIXME: This should take target-specified instruction sizes into account.
- if (!OF || OF->Candidates.size() < 2)
+ if (!OF.has_value() || OF.value()->Candidates.size() < MinRepeates)
continue;
// Is it better to outline this candidate than not?
- if (OF->getBenefit() < OutlinerBenefitThreshold) {
- emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, *OF);
+ if (OF.value()->getBenefit() < OutlinerBenefitThreshold) {
+ emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq,
+ *OF.value());
continue;
}
- FunctionList.push_back(std::make_unique<OutlinedFunction>(*OF));
+ FunctionList.emplace_back(std::move(OF.value()));
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 156ab6568f833e..934cd2b72024c7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -8684,11 +8684,11 @@ static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
}
-std::optional<outliner::OutlinedFunction>
+std::optional<std::unique_ptr<outliner::OutlinedFunction>>
AArch64InstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
- unsigned MinRep) const {
+ unsigned MinRepeates) const {
unsigned SequenceSize = 0;
for (auto &MI : RepeatedSequenceLocs[0])
SequenceSize += getInstSizeInBytes(MI);
@@ -8802,7 +8802,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
- if (RepeatedSequenceLocs.size() < MinRep)
+ if (RepeatedSequenceLocs.size() < MinRepeates)
return std::nullopt;
}
@@ -9049,7 +9049,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
}
// If we dropped all of the candidates, bail out here.
- if (RepeatedSequenceLocs.size() < MinRep) {
+ if (RepeatedSequenceLocs.size() < MinRepeates) {
RepeatedSequenceLocs.clear();
return std::nullopt;
}
@@ -9092,8 +9092,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
if (FrameID != MachineOutlinerTailCall && CFICount > 0)
return std::nullopt;
- return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
- NumBytesToCreateFrame, FrameID);
+ return std::make_unique<outliner::OutlinedFunction>(
+ RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
}
void AArch64InstrInfo::mergeOutliningCandidateAttributes(
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 762fb9873065e6..4814d394e30a03 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -471,10 +471,11 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
bool OutlineFromLinkOnceODRs) const override;
- std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ std::optional<std::unique_ptr<outliner::OutlinedFunction>>
+ getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
- unsigned MinRep) const override;
+ unsigned MinRepeates) const override;
void mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1199052ca97e9c..c3fc8b3f1c9bb9 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -5871,10 +5871,11 @@ static bool isLRAvailable(const TargetRegisterInfo &TRI,
return !Live;
}
-std::optional<outliner::OutlinedFunction>
+std::optional<std::unique_ptr<outliner::OutlinedFunction>>
ARMBaseInstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRepeates) const {
unsigned SequenceSize = 0;
for (auto &MI : RepeatedSequenceLocs[0])
SequenceSize += getInstSizeInBytes(MI);
@@ -5915,7 +5916,7 @@ ARMBaseInstrInfo::getOutliningCandidateInfo(
llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRepeates)
return std::nullopt;
}
@@ -5941,7 +5942,7 @@ ARMBaseInstrInfo::getOutliningCandidateInfo(
else
RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRepeates)
return std::nullopt;
// Likewise, partition the candidates according to PAC-RET enablement.
@@ -5958,7 +5959,7 @@ ARMBaseInstrInfo::getOutliningCandidateInfo(
else
RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRepeates)
return std::nullopt;
// At this point, we have only "safe" candidates to outline. Figure out
@@ -6062,7 +6063,7 @@ ARMBaseInstrInfo::getOutliningCandidateInfo(
RepeatedSequenceLocs.size() * Costs.CallDefault) {
RepeatedSequenceLocs = CandidatesWithoutStackFixups;
FrameID = MachineOutlinerNoLRSave;
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRepeates)
return std::nullopt;
} else
SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);
@@ -6088,8 +6089,8 @@ ARMBaseInstrInfo::getOutliningCandidateInfo(
NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;
}
- return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
- NumBytesToCreateFrame, FrameID);
+ return std::make_unique<outliner::OutlinedFunction>(
+ RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
}
bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 8521e3ef91399a..b0ac6479ab6fdc 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -355,9 +355,11 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
/// ARM supports the MachineOutliner.
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
bool OutlineFromLinkOnceODRs) const override;
- std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ std::optional<std::unique_ptr<outliner::OutlinedFunction>>
+ getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRepeates) const override;
void mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 9dd79027d7a162..766ee16393e6fb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2828,10 +2828,11 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
return MF.getFunction().hasMinSize();
}
-std::optional<outliner::OutlinedFunction>
+std::optional<std::unique_ptr<outliner::OutlinedFunction>>
RISCVInstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRepeates) const {
// First we need to filter out candidates where the X5 register (IE t0) can't
// be used to setup the function call.
@@ -2843,7 +2844,7 @@ RISCVInstrInfo::getOutliningCandidateInfo(
llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
// If the sequence doesn't have enough candidates left, then we're done.
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRepeates)
return std::nullopt;
unsigned SequenceSize = 0;
@@ -2864,8 +2865,9 @@ RISCVInstrInfo::getOutliningCandidateInfo(
.hasStdExtCOrZca())
FrameOverhead = 2;
- return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
- FrameOverhead, MachineOutlinerDefault);
+ return std::make_unique<outliner::OutlinedFunction>(
+ RepeatedSequenceLocs, SequenceSize, FrameOverhead,
+ MachineOutlinerDefault);
}
outliner::InstrType
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index ecb7982b3e5e36..e431ae30839297 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -205,9 +205,11 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
// Calculate target-specific information for a set of outlining candidates.
- std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ std::optional<std::unique_ptr<outliner::OutlinedFunction>>
+ getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRepeates) const override;
// Return if/how a given MachineInstr should be outlined.
virtual outliner::InstrType
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 39ba7ea777909c..087e869c28fa3a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10521,10 +10521,11 @@ FunctionPass *llvm::createCleanupLocalDynamicTLSPass() {
///
enum MachineOutlinerClass { MachineOutlinerDefault, MachineOutlinerTailCall };
-std::optional<outliner::OutlinedFunction>
+std::optional<std::unique_ptr<outliner::OutlinedFunction>>
X86InstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRepeates) const {
unsigned SequenceSize = 0;
for (auto &MI : RepeatedSequenceLocs[0]) {
// FIXME: x86 doesn't implement getInstSizeInBytes, so
@@ -10561,9 +10562,10 @@ X86InstrInfo::getOutliningCandidateInfo(
for (outliner::Candidate &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerTailCall, 1);
- return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
- 0, // Number of bytes to emit frame.
- MachineOutlinerTailCall // Type of frame.
+ return std::make_unique<outliner::OutlinedFunction>(
+ RepeatedSequenceLocs, SequenceSize,
+ 0, // Number of bytes to emit frame.
+ MachineOutlinerTailCall // Type of frame.
);
}
@@ -10573,8 +10575,8 @@ X86InstrInfo::getOutliningCandidateInfo(
for (outliner::Candidate &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerDefault, 1);
- return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 1,
- MachineOutlinerDefault);
+ return std::make_unique<outliner::OutlinedFunction>(
+ RepeatedSequenceLocs, SequenceSize, 1, MachineOutlinerDefault);
}
bool X86InstrInfo::isFunctionSafeToOutlineFrom(
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 3100a9e5699f0a..8593430d780d39 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -584,9 +584,11 @@ class X86InstrInfo final : public X86GenInstrInfo {
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
- std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ std::optional<std::unique_ptr<outliner::OutlinedFunction>>
+ getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRepeates) const override;
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
bool OutlineFromLinkOnceODRs) const override;
>From 7b067efffa12d7b4d037b1b867bbf9dbf247927c Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Wed, 24 Apr 2024 11:26:23 -0700
Subject: [PATCH 3/3] [CGData][MachineOutliner] Global Outlining2
This commit introduces support for outlining functions across modules using codegen data generated from previous codegen. The codegen data currently manages the outlined hash tree, which records outlining instances that occurred locally in the past.
The machine outliner now operates in one of three modes:
1. CGDataMode::None: This is the default outliner mode that uses the suffix tree to identify (local) outlining candidates within a module. This mode is also used by (full)LTO to maintain optimal behavior with the combined module.
2. CGDataMode::Write (`codegen-data-generate`): This mode is identical to the default mode, but it also publishes the stable hash sequences of instructions in the outlined functions into a local outlined hash tree. It then encodes this into the `__llvm_outline` section, which will be dead-stripped at link time.
3. CGDataMode::Read (`codegen-data-use-path={.cgdata}`): This mode reads a codegen data file (.cgdata) and initializes a global outlined hash tree. This tree is used to generate global outlining candidates. Note that the codegen data file has been post-processed with the raw `__llvm_outline` sections from all native objects using the `llvm-cgdata` tool (or a linker, `LLD`, or a new ThinLTO pipeline later).
---
llvm/include/llvm/CodeGen/MachineOutliner.h | 36 +++
llvm/lib/CGData/CodeGenData.cpp | 26 +-
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/MachineOutliner.cpp | 260 +++++++++++++++++-
llvm/lib/CodeGen/MachineStableHash.cpp | 19 +-
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 +
.../CodeGen/AArch64/cgdata-global-hash.ll | 40 +++
.../CodeGen/AArch64/cgdata-outlined-name.ll | 41 +++
.../AArch64/cgdata-read-double-outline.ll | 57 ++++
.../AArch64/cgdata-read-lto-outline.ll | 96 +++++++
.../CodeGen/AArch64/cgdata-read-priority.ll | 68 +++++
.../AArch64/cgdata-read-single-outline.ll | 42 +++
.../CodeGen/AArch64/cgdata-write-outline.ll | 51 ++++
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 +
14 files changed, 734 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-global-hash.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-write-outline.ll
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index 84937a8b563ac0..3bda86f399b7b5 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineStableHash.h"
#include <initializer_list>
namespace llvm {
@@ -274,6 +275,41 @@ struct OutlinedFunction {
OutlinedFunction() = delete;
virtual ~OutlinedFunction() = default;
};
+
+/// The information necessary to create an outlined function that is matched
+/// globally.
+struct GlobalOutlinedFunction : public OutlinedFunction {
+ GlobalOutlinedFunction(OutlinedFunction &OF, unsigned GlobalOccurrenceCount)
+ : OutlinedFunction(OF.Candidates, OF.SequenceSize, OF.FrameOverhead,
+ OF.FrameConstructionID),
+ GlobalOccurrenceCount(GlobalOccurrenceCount) {}
+
+ unsigned GlobalOccurrenceCount;
+
+ /// Return the number of times that appear globally.
+ /// Global outlining candidate is uniquely created per each match, but this
+ /// might be erased out when it's overlapped with the previous outlining
+ /// instance.
+ unsigned getOccurrenceCount() const override {
+ assert(Candidates.size() <= 1);
+ return Candidates.empty() ? 0 : GlobalOccurrenceCount;
+ }
+
+ /// Return the outlining cost using the global occurrence count
+ /// with the same cost as the first (unique) candidate.
+ unsigned getOutliningCost() const override {
+ assert(Candidates.size() <= 1);
+ unsigned CallOverhead =
+ Candidates.empty()
+ ? 0
+ : Candidates[0].getCallOverhead() * getOccurrenceCount();
+ return CallOverhead + SequenceSize + FrameOverhead;
+ }
+
+ GlobalOutlinedFunction() = delete;
+ ~GlobalOutlinedFunction() = default;
+};
+
} // namespace outliner
} // namespace llvm
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 9dd4b1674e094a..55d2504231c744 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -24,6 +24,13 @@
using namespace llvm;
using namespace cgdata;
+cl::opt<bool>
+ CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden,
+ cl::desc("Emit CodeGen Data into custom sections"));
+cl::opt<std::string>
+ CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
+ cl::desc("File path to where .cgdata file is read"));
+
static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
std::string Msg;
@@ -132,7 +139,24 @@ CodeGenData &CodeGenData::getInstance() {
std::call_once(CodeGenData::OnceFlag, []() {
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
- // TODO: Initialize writer or reader mode for the client optimization.
+ if (CodeGenDataGenerate)
+ Instance->EmitCGData = true;
+ else if (!CodeGenDataUsePath.empty()) {
+ // Initialize the global CGData if the input file name is given.
+ // We do not error-out when failing to parse the input file.
+ // Instead, just emit an warning message and fall back as if no CGData
+ // were available.
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
+ if (Error E = ReaderOrErr.takeError()) {
+ warn(std::move(E), CodeGenDataUsePath);
+ return;
+ }
+ // Publish each CGData based on the data type in the header.
+ auto Reader = ReaderOrErr->get();
+ if (Reader->hasOutlinedHashTree())
+ Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
+ }
});
return *(Instance.get());
}
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f1607f85c5b319..3e75737185c3ee 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -267,6 +267,7 @@ add_llvm_component_library(LLVMCodeGen
Analysis
BitReader
BitWriter
+ CGData
CodeGenTypes
Core
MC
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index ed194355f72add..f641e86da545ec 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,7 +59,9 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -75,6 +77,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/SuffixTree.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <functional>
#include <tuple>
#include <vector>
@@ -98,6 +101,10 @@ STATISTIC(NumInvisible,
"Invisible instructions skipped during mapping");
STATISTIC(UnsignedVecSize,
"Total number of instructions mapped and saved to mapping vector");
+STATISTIC(StableHashAttempts,
+ "Count of hashing attempts made for outlined functions");
+STATISTIC(StableHashDropped,
+ "Count of unsuccessful hashing attempts for outlined functions");
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -128,6 +135,19 @@ static cl::opt<bool> OutlinerLeafDescendants(
"tree as candidates for outlining (if false, only leaf children "
"are considered)"));
+static cl::opt<bool>
+ DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
+ cl::desc("Disable global outlining only by ignoring "
+ "the codegen data generation or use"),
+ cl::init(false));
+
+static cl::opt<bool> AppendContentHashToOutlinedName(
+ "append-content-hash-outlined-name", cl::Hidden,
+ cl::desc("This appends the content hash to the globally outlined function "
+ "name. It's beneficial for enhancing the precision of the stable "
+ "hash and for ordering the outlined functions."),
+ cl::init(true));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -421,11 +441,29 @@ struct MachineOutliner : public ModulePass {
/// Set when the pass is constructed in TargetPassConfig.
bool RunOnAllFunctions = true;
+ /// This is a compact representation of hash sequences of outlined functions.
+ /// It is used when OutlinerMode = CGDataMode::Write.
+ /// The resulting hash tree will be emitted into __llvm_outlined section
+ /// which will be dead-stripped not going to the final binary.
+ /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
+ /// a global oulined hash tree for the subsequent codegen.
+ std::unique_ptr<OutlinedHashTree> LocalHashTree;
+
+ /// The mode of the outliner.
+ /// When is's CGDataMode::None, candidates are populated with the suffix tree
+ /// within a module and outlined.
+ /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
+ /// sequences of outlined functions are published into LocalHashTree.
+ /// When it's CGDataMode::Read, candidates are populated with the global
+ /// outlined hash tree that has been built by the previous codegen.
+ CGDataMode OutlinerMode = CGDataMode::None;
+
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.addRequired<ImmutableModuleSummaryIndexWrapperPass>();
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
@@ -460,6 +498,16 @@ struct MachineOutliner : public ModulePass {
findCandidates(InstructionMapper &Mapper,
std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
+ /// Find all repeated substrings that match in the global outlined hash
+ /// tree built from the previous codegen.
+ ///
+ /// \param Mapper Contains outlining mapping information.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
+ /// each type of candidate.
+ void findGlobalCandidates(
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
+
/// Replace the sequences of instructions represented by \p OutlinedFunctions
/// with calls to functions.
///
@@ -476,6 +524,17 @@ struct MachineOutliner : public ModulePass {
InstructionMapper &Mapper,
unsigned Name);
+ /// Compute and publish the stable hash sequence of instructions in the
+ /// outlined function, \p MF. The parameter \p CandSize represents the number
+ /// of candidates that have identical instruction sequences to \p MF.
+ void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
+
+ /// Initialize the outliner mode.
+ void initializeOutlinerMode(const Module &M);
+
+ /// Emit the outlined hash tree into __llvm_outline section.
+ void emitOutlinedHashTree(Module &M);
+
/// Calls 'doOutline()' 1 + OutlinerReruns times.
bool runOnModule(Module &M) override;
@@ -585,6 +644,109 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
+struct MatchedEntry {
+ size_t StartIdx;
+ size_t Length;
+ size_t Count;
+};
+
+static const HashNode *followHashNode(stable_hash StableHash,
+ const HashNode *Current) {
+ auto I = Current->Successors.find(StableHash);
+ return (I == Current->Successors.end()) ? nullptr : I->second.get();
+}
+
+// Find all matches in the global outlined hash tree.
+// It's quadratic complexity in theory, but it's nearly linear in practice
+// since the length of outlined sequences are small within a block.
+static std::vector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
+ auto &InstrList = Mapper.InstrList;
+ auto &UnsignedVec = Mapper.UnsignedVec;
+
+ std::vector<MatchedEntry> MatchedEntries;
+ std::vector<stable_hash> Sequence;
+ auto Size = UnsignedVec.size();
+
+ // Get the global outlined hash tree built from the previous run.
+ assert(cgdata::hasOutlinedHashTree());
+ const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
+ for (size_t I = 0; I < Size; ++I) {
+ // skip the invalid mapping that represents a large negative value.
+ if (UnsignedVec[I] >= Size)
+ continue;
+ const MachineInstr &MI = *InstrList[I];
+ // skip debug instructions as we did for the outlined function.
+ if (MI.isDebugInstr())
+ continue;
+ // skip the empty hash value.
+ stable_hash StableHashI = stableHashValue(MI);
+ if (!StableHashI)
+ continue;
+ Sequence.clear();
+ Sequence.push_back(StableHashI);
+
+ const HashNode *LastNode = followHashNode(StableHashI, RootNode);
+ if (!LastNode)
+ continue;
+
+ size_t J = I + 1;
+ for (; J < Size; ++J) {
+ // break on the invalid mapping that represents a large negative value.
+ if (UnsignedVec[J] >= Size)
+ break;
+ // ignore debug instructions as we did for the outlined function.
+ const MachineInstr &MJ = *InstrList[J];
+ if (MJ.isDebugInstr())
+ continue;
+ // break on the empty hash value.
+ stable_hash StableHashJ = stableHashValue(MJ);
+ if (!StableHashJ)
+ break;
+ LastNode = followHashNode(StableHashJ, LastNode);
+ if (!LastNode)
+ break;
+
+ // Even with a match ending with a terminal, we continue finding
+ // matches to populate all candidates.
+ Sequence.push_back(StableHashJ);
+ auto Count = LastNode->Terminals;
+ if (Count)
+ MatchedEntries.push_back({I, J - I + 1, *Count});
+ }
+ }
+
+ return MatchedEntries;
+}
+
+void MachineOutliner::findGlobalCandidates(
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
+ FunctionList.clear();
+ auto &InstrList = Mapper.InstrList;
+ auto &MBBFlagsMap = Mapper.MBBFlagsMap;
+
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+ for (auto &ME : getMatchedEntries(Mapper)) {
+ CandidatesForRepeatedSeq.clear();
+ MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
+ MachineBasicBlock::iterator EndIt = InstrList[ME.StartIdx + ME.Length - 1];
+ MachineBasicBlock *MBB = StartIt->getParent();
+ Candidate C(ME.StartIdx, ME.Length, StartIt, EndIt, MBB,
+ FunctionList.size(), MBBFlagsMap[MBB]);
+ CandidatesForRepeatedSeq.push_back(C);
+ const TargetInstrInfo *TII = C.getMF()->getSubtarget().getInstrInfo();
+ std::optional<OutlinedFunction> OF = TII->getOutliningCandidateInfo(
+ *MMI, CandidatesForRepeatedSeq, /*MinRep=*/1);
+ if (!OF || OF->Candidates.empty())
+ continue;
+ // We create a global candidate each match.
+ assert(OF->Candidates.size() == 1);
+
+ FunctionList.push_back(
+ std::make_unique<GlobalOutlinedFunction>(*OF, ME.Count));
+ }
+}
+
void MachineOutliner::findCandidates(
InstructionMapper &Mapper,
std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
@@ -695,6 +857,40 @@ void MachineOutliner::findCandidates(
}
}
+void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
+ unsigned CandSize) {
+ // Compute the hash sequence for the outlined function.
+ SmallVector<stable_hash> OutlinedHashSequence;
+ for (auto &MBB : MF) {
+ for (auto &NewMI : MBB) {
+ stable_hash Hash = stableHashValue(NewMI);
+ if (!Hash) {
+ OutlinedHashSequence.clear();
+ break;
+ }
+ OutlinedHashSequence.push_back(Hash);
+ }
+ }
+
+ // Append a unique name based on the non-empty hash sequence.
+ if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
+ auto CombinedHash = stable_hash_combine_range(OutlinedHashSequence.begin(),
+ OutlinedHashSequence.end());
+ auto NewName =
+ MF.getName().str() + ".content." + std::to_string(CombinedHash);
+ MF.getFunction().setName(NewName);
+ }
+
+ // Publish the non-empty hash sequence to the local hash tree.
+ if (OutlinerMode == CGDataMode::Write) {
+ StableHashAttempts++;
+ if (!OutlinedHashSequence.empty())
+ LocalHashTree->insert({OutlinedHashSequence, CandSize});
+ else
+ StableHashDropped++;
+ }
+}
+
MachineFunction *MachineOutliner::createOutlinedFunction(
Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
@@ -770,6 +966,9 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
}
}
+ if (OutlinerMode != CGDataMode::None)
+ computeAndPublishHashSequence(MF, OF.Candidates.size());
+
// Set normal properties for a late MachineFunction.
MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
@@ -1134,12 +1333,65 @@ void MachineOutliner::emitInstrCountChangedRemark(
}
}
+void MachineOutliner::initializeOutlinerMode(const Module &M) {
+ if (DisableGlobalOutlining)
+ return;
+
+ if (auto *IndexWrapperPass =
+ getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
+ auto *TheIndex = IndexWrapperPass->getIndex();
+ // (Full)LTO module does not have functions added to the index.
+ // In this case, we run the outliner without using codegen data as usual.
+ if (TheIndex && !TheIndex->hasExportedFunctions(M))
+ return;
+ }
+
+ // When codegen data write is enabled, we want to write the local outlined
+ // hash tree to the custom section, `__llvm_outline`.
+ // When the outlined hash tree is available from the previous codegen data,
+ // we want to read it to optimistically create global outlining candidates.
+ if (cgdata::emitCGData()) {
+ OutlinerMode = CGDataMode::Write;
+ // Create a local outlined hash tree to be published.
+ LocalHashTree.reset(new OutlinedHashTree());
+ // We don't need to read the outlined hash tree from the previous codegen
+ } else if (cgdata::hasOutlinedHashTree())
+ OutlinerMode = CGDataMode::Read;
+}
+
+void MachineOutliner::emitOutlinedHashTree(Module &M) {
+ assert(LocalHashTree);
+ if (!LocalHashTree->empty()) {
+ LLVM_DEBUG({
+ dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
+ << "\n";
+ });
+ SmallVector<char> Buf;
+ raw_svector_ostream OS(Buf);
+
+ OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
+ HTR.serialize(OS);
+
+ llvm::StringRef Data(Buf.data(), Buf.size());
+ std::unique_ptr<MemoryBuffer> Buffer =
+ MemoryBuffer::getMemBuffer(Data, "in-memory outlined hash tree", false);
+
+ Triple TT(M.getTargetTriple());
+ embedBufferInModule(
+ M, *Buffer.get(),
+ getCodeGenDataSectionName(CG_outline, TT.getObjectFormat()));
+ }
+}
+
bool MachineOutliner::runOnModule(Module &M) {
// Check if there's anything in the module. If it's empty, then there's
// nothing to outline.
if (M.empty())
return false;
+ // Initialize the outliner mode.
+ initializeOutlinerMode(M);
+
MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// Number to append to the current outlined function.
@@ -1161,6 +1413,9 @@ bool MachineOutliner::runOnModule(Module &M) {
}
}
+ if (OutlinerMode == CGDataMode::Write)
+ emitOutlinedHashTree(M);
+
return true;
}
@@ -1189,7 +1444,10 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
// Find all of the outlining candidates.
- findCandidates(Mapper, FunctionList);
+ if (OutlinerMode == CGDataMode::Read)
+ findGlobalCandidates(Mapper, FunctionList);
+ else
+ findCandidates(Mapper, FunctionList);
// If we've requested size remarks, then collect the MI counts of every
// function before outlining, and the MI counts after outlining.
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..d639dd9a6bd015 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -95,9 +95,22 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_Metadata:
StableHashBailingMetadataUnsupported++;
return 0;
- case MachineOperand::MO_GlobalAddress:
- StableHashBailingGlobalAddress++;
- return 0;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if (GV->hasPrivateLinkage() || !GV->hasName()) {
+ StableHashBailingGlobalAddress++;
+ return 0;
+ }
+ auto Name = GV->getName();
+ // Use the content hash of the outlined function.
+ auto Pos = Name.find_last_of(".content.");
+ if (Pos != StringRef::npos) {
+ assert(Name.starts_with("OUTLINED_FUNCTION"));
+ Name = Name.substr(Pos);
+ }
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ xxh3_64bits(Name), MO.getOffset());
+ }
case MachineOperand::MO_TargetIndex: {
if (const char *Name = MO.getTargetIndexName())
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 3465b717261cf5..66ce960462c63d 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -16,6 +16,7 @@
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
+; CHECK-NEXT: Module summary info
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
diff --git a/llvm/test/CodeGen/AArch64/cgdata-global-hash.ll b/llvm/test/CodeGen/AArch64/cgdata-global-hash.ll
new file mode 100644
index 00000000000000..c425eda56f5d5b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-global-hash.ll
@@ -0,0 +1,40 @@
+; This test verifies the stable hash values for different global variables
+; that have distinct names.
+; We generate two different cgdata files from nearly identical outline instances,
+; with the only difference being the last call target globals, @g vs @h.
+
+; RUN: split-file %s %t
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-g.ll -o %t/local-g.o
+; RUN: llvm-cgdata --merge %t/local-g.o -o %t/local-g.cgdata
+; RUN: llvm-cgdata --convert %t/local-g.cgdata -o %t/local-g.cgtext
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-h.ll -o %t/local-h.o
+; RUN: llvm-cgdata --merge %t/local-h.o -o %t/local-h.cgdata
+; RUN: llvm-cgdata --convert %t/local-h.cgdata -o %t/local-h.cgtext
+
+; We compare the trees which are only different at the terminal node's hash value.
+; Here we simply count the different lines that have `Hash` string.
+; RUN: not diff %t/local-g.cgtext %t/local-h.cgtext 2>&1 | grep Hash | wc -l | FileCheck %s
+; CHECK: 2
+
+;--- local-g.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-h.ll
+declare i32 @h(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @h(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @h(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll b/llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll
new file mode 100644
index 00000000000000..69f1ecd6515e7e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll
@@ -0,0 +1,41 @@
+; This test verifies the globally outlined function name has the content hash.
+
+; RUN: split-file %s %t
+
+; Check if the outlined function name has the content hash depending the flag.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -append-content-hash-outlined-name=false -filetype=obj %t/local-two.ll -o %t_write_base
+; RUN: llvm-objdump -d %t_write_base | FileCheck %s --check-prefix=BASE
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -append-content-hash-outlined-name=true -filetype=obj %t/local-two.ll -o %t_write_suffix
+; RUN: llvm-objdump -d %t_write_suffix | FileCheck %s --check-prefix=SUFFIX
+; BASE-NOT: _OUTLINED_FUNCTION_{{.*}}.content.{{[0-9]+}}
+; SUFFIX: _OUTLINED_FUNCTION_{{.*}}.content.{{[0-9]+}}
+
+; Generate the cgdata file from each case and show they are identical.
+; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
+; RUN: llvm-cgdata --merge %t_write_suffix -o %t_cgdata_suffix
+; RUN: diff %t_cgdata_base %t_cgdata_suffix
+
+; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
+; Check if the outlined function has the content hash depending the flag.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base
+; RUN: llvm-objdump -d %t_read_base | FileCheck %s --check-prefix=BASE
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_suffix -append-content-hash-outlined-name=true -filetype=obj %t/local-one.ll -o %t_read_suffix
+; RUN: llvm-objdump -d %t_read_suffix | FileCheck %s --check-prefix=SUFFIX
+
+;--- local-two.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-one.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll
new file mode 100644
index 00000000000000..6e027308c17068
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll
@@ -0,0 +1,57 @@
+; This test demonstrates how identical instruction sequences are handled during global outlining.
+; Currently, we do not attempt to share an outlined function for identical sequences.
+; Instead, each instruction sequence that matches against the global outlined hash tree
+; is outlined into its own unique function.
+
+; RUN: split-file %s %t
+
+; First, we generate the cgdata file from a local outline instance present in local-two.ll.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-two.ll -o %t_write
+; RUN: llvm-cgdata --merge %t_write -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 4
+; SHOW-NEXT: Terminal Node Count: 1
+; SHOW-NEXT: Depth: 3
+
+; Now, we read the cgdata for local-two-another.ll and proceed to optimistically outline
+; each instruction sequence that matches against the global outlined hash tree.
+; Since each matching sequence is considered a candidate, we expect to generate two
+; unique outlined functions. These functions, although unique, will be identical in code,
+; and thus, will be folded by the linker.
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/local-two-another.ll -o %t_read
+; RUN: llvm-objdump -d %t_read | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+;--- local-two.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-two-another.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 40, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll
new file mode 100644
index 00000000000000..f1a5d1a0ccc7f0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll
@@ -0,0 +1,96 @@
+; This test is similar to cgdata-read-double-outline.ll, but it is executed with LTO (Link Time Optimization).
+; It demonstrates how identical instruction sequences are handled during global outlining.
+; Currently, we do not attempt to reuse an outlined function for identical sequences.
+; Instead, each instruction sequence that appears in the global outlined hash tree
+; is outlined into its own unique function.
+
+; RUN: split-file %s %t
+
+; We first create the cgdata file from a local outline instance in local-two.ll
+; RUN: opt -module-summary %t/local-two.ll -o %t/write.bc
+; RUN: llvm-lto2 run %t/write.bc -o %t/write \
+; RUN: -r %t/write.bc,_f1,px -r %t/write.bc,_f2,px -r %t/write.bc,_g,p \
+; RUN: -codegen-data-generate=true
+; RUN: llvm-cgdata --merge %t/write.1 -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 4
+; SHOW-NEXT: Terminal Node Count: 1
+; SHOW-NEXT: Depth: 3
+
+; Now, we execute either ThinLTO or LTO by reading the cgdata for local-two-another.ll.
+; With ThinLTO, similar to the no-LTO scenario shown in cgdata-read-double-outline.ll,
+; it optimistically outlines each instruction sequence that matches against
+; the global outlined hash tree. Since each matching sequence is considered a candidate,
+; we expect to generate two unique outlined functions that will be folded
+; by the linker at a later stage.
+; However, with LTO, we do not utilize the cgdata, but instead fall back to the default
+; outliner mode. This results in a single outlined function that is
+; shared across two call-sites.
+
+; Run ThinLTO
+; RUN: opt -module-summary %t/local-two-another.ll -o %t/thinlto.bc
+; RUN: llvm-lto2 run %t/thinlto.bc -o %t/thinlto \
+; RUN: -r %t/thinlto.bc,_f3,px -r %t/thinlto.bc,_f4,px -r %t/thinlto.bc,_g,p \
+; RUN: -codegen-data-use-path=%t_cgdata
+; RUN: llvm-objdump -d %t/thinlto.1 | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+; Run ThinLTO while disabling the global outliner.
+; We have a single outlined case with the default outliner.
+; RUN: llvm-lto2 run %t/thinlto.bc -o %t/thinlto-disable \
+; RUN: -r %t/thinlto.bc,_f3,px -r %t/thinlto.bc,_f4,px -r %t/thinlto.bc,_g,p \
+; RUN: -enable-machine-outliner \
+; RUN: -codegen-data-use-path=%t_cgdata \
+; RUN: -disable-global-outlining
+; RUN: llvm-objdump -d %t/thinlto-disable.1 | FileCheck %s --check-prefix=DISABLE
+
+; DISABLE: _OUTLINED_FUNCTION_{{.*}}:
+; DISABLE-NEXT: mov
+; DISABLE-NEXT: mov
+; DISABLE-NEXT: b
+; DISABLE-NOT: _OUTLINED_FUNCTION_{{.*}}:
+
+; Run LTO, which effectively disables the global outliner.
+; RUN: opt %t/local-two-another.ll -o %t/lto.bc
+; RUN: llvm-lto2 run %t/lto.bc -o %t/lto \
+; RUN: -r %t/lto.bc,_f3,px -r %t/lto.bc,_f4,px -r %t/lto.bc,_g,p \
+; RUN: -enable-machine-outliner \
+; RUN: -codegen-data-use-path=%t_cgdata
+; RUN: llvm-objdump -d %t/lto.0 | FileCheck %s --check-prefix=DISABLE
+
+;--- local-two.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-two-another.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 40, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll b/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
new file mode 100644
index 00000000000000..affeea8c71acd3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
@@ -0,0 +1,68 @@
+; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
+; using codegen data that has been read from a previous codegen run.
+; When multiple matches occur, we prioritize the candidates using the global frequency.
+
+; RUN: split-file %s %t
+
+; First, we generate the cgdata file from local outline instances present in write1.ll and write2.ll
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/write1.ll -o %t_write1
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/write2.ll -o %t_write2
+; RUN: llvm-cgdata --merge %t_write1 %t_write2 -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 8
+; SHOW-NEXT: Terminal Node Count: 2
+; SHOW-NEXT: Depth: 4
+
+; Now, we read the cgdata in the machine outliner, enabling us to optimistically
+; outline a singleton instance in read.ll that matches against the cgdata.
+; There are two matches -- (1) (mov #1, mov #2, mov #3, b) and (2) (mov #2, mov #3, b).
+; Even though sequence (1) is longer than sequence (2), the latter is outlined because it occurs more frequently in the outlined hash tree.
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/read.ll -o %t_read
+; RUN: llvm-objdump -d %t_read | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+;--- write1.ll
+; The sequence (mov #2, mov #3, b) are repeated 4 times.
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 50, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 60, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 70, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 40, i32 80, i32 2, i32 3);
+ ret i32 %1
+}
+
+;--- write2.ll
+; The sequence (mov #1, mov #2, mov #3, b) are repeated 2 times.
+declare i32 @g(i32, i32, i32)
+define i32 @f6() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f7() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2, i32 3);
+ ret i32 %1
+}
+
+;--- read.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2, i32 3);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll
new file mode 100644
index 00000000000000..7725648a6bc3d5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll
@@ -0,0 +1,42 @@
+; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
+; using codegen data that has been read from a previous codegen run.
+
+; RUN: split-file %s %t
+
+; First, we generate the cgdata file from a local outline instance present in local-two.ll.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-two.ll -o %t_write
+; RUN: llvm-cgdata --merge %t_write -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 4
+; SHOW-NEXT: Terminal Node Count: 1
+; SHOW-NEXT: Depth: 3
+
+; Now, we read the cgdata in the machine outliner, enabling us to optimistically
+; outline a singleton instance in local-one.ll that matches against the cgdata.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/local-one.ll -o %t_read
+; RUN: llvm-objdump -d %t_read | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+;--- local-two.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-one.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-write-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-write-outline.ll
new file mode 100644
index 00000000000000..09ad499190ee37
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-write-outline.ll
@@ -0,0 +1,51 @@
+; This test verifies whether an outlined function is encoded into the __llvm_outline section
+; when the -codegen-data-generate flag is used.
+
+; Verify whether an outlined function is always created, but only encoded into the section when the flag is used.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %s -o %t_save
+; RUN: llvm-objdump -d %t_save | FileCheck %s
+; RUN: llvm-objdump -h %t_save | FileCheck %s --check-prefix=SECTNAME
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=false -filetype=obj %s -o %t_nosave
+; RUN: llvm-objdump -d %t_nosave | FileCheck %s
+; RUN: llvm-objdump -h %t_nosave | FileCheck %s --check-prefix=NOSECTNAME
+
+; CHECK: _OUTLINED_FUNCTION
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+; SECTNAME: __llvm_outline
+; NOSECTNAME-NOT: __llvm_outline
+
+; Verify the content of cgdata after it has been processed with llvm-cgdata.
+; RUN: llvm-cgdata --merge %t_save -o %t_cgdata
+; RUN: llvm-cgdata --convert %t_cgdata | FileCheck %s --check-prefix=TREE
+
+; TREE: :outlined_hash_tree
+; TREE: ---
+; TREE-NEXT: 0:
+; TREE-NEXT: Hash: 0x0
+; TREE-NEXT: Terminals: 0
+; TREE-NEXT: SuccessorIds: [ 1 ]
+; TREE-NEXT: 1:
+; TREE-NEXT: Hash: {{.}}
+; TREE-NEXT: Terminals: 0
+; TREE-NEXT: SuccessorIds: [ 2 ]
+; TREE-NEXT: 2:
+; TREE-NEXT: Hash: {{.}}
+; TREE-NEXT: Terminals: 0
+; TREE-NEXT: SuccessorIds: [ 3 ]
+; TREE-NEXT: 3:
+; TREE-NEXT: Hash: {{.}}
+; TREE-NEXT: Terminals: 2
+; TREE-NEXT: SuccessorIds: [ ]
+; TREE-NEXT: ...
+
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 44c270fdc3c257..7749f0db0c54d3 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -20,6 +20,7 @@
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
+; CHECK-NEXT: Module summary info
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
More information about the llvm-commits
mailing list