[llvm] Globaloutline2 (PR #105443)
Kyungwoo Lee via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 09:35:31 PDT 2024
https://github.com/kyulee-com updated https://github.com/llvm/llvm-project/pull/105443
>From ad8e93070836da94df51c6d15207b7d1bc80f781 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 23 Aug 2024 08:58:33 -0700
Subject: [PATCH 1/3] [StableHash][NFC] Implement with xxh3_64bits
---
llvm/include/llvm/ADT/StableHashing.h | 70 ++++----------------------
llvm/lib/CodeGen/MachineOperand.cpp | 3 +-
llvm/lib/CodeGen/MachineStableHash.cpp | 27 ++++------
3 files changed, 22 insertions(+), 78 deletions(-)
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index f675f828f702e5..a5b655a10f6996 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -16,6 +16,7 @@
#define LLVM_ADT_STABLEHASHING_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/xxhash.h"
namespace llvm {
@@ -23,78 +24,29 @@ namespace llvm {
/// deserialized, and is stable across processes and executions.
using stable_hash = uint64_t;
-// Implementation details
-namespace hashing {
-namespace detail {
-
-// Stable hashes are based on the 64-bit FNV-1 hash:
-// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
-
-const uint64_t FNV_PRIME_64 = 1099511628211u;
-const uint64_t FNV_OFFSET_64 = 14695981039346656037u;
-
-inline void stable_hash_append(stable_hash &Hash, const char Value) {
- Hash = Hash ^ (Value & 0xFF);
- Hash = Hash * FNV_PRIME_64;
-}
-
-inline void stable_hash_append(stable_hash &Hash, stable_hash Value) {
- for (unsigned I = 0; I < 8; ++I) {
- stable_hash_append(Hash, static_cast<char>(Value));
- Value >>= 8;
- }
+inline stable_hash stable_hash_combine(ArrayRef<stable_hash> Buffer) {
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(Buffer.data());
+ size_t Size = Buffer.size() * sizeof(stable_hash);
+ return xxh3_64bits(ArrayRef<uint8_t>(Ptr, Size));
}
-} // namespace detail
-} // namespace hashing
-
inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- hashing::detail::stable_hash_append(Hash, A);
- hashing::detail::stable_hash_append(Hash, B);
- return Hash;
+ stable_hash Hashes[2] = {A, B};
+ return stable_hash_combine(Hashes);
}
inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
stable_hash C) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- hashing::detail::stable_hash_append(Hash, A);
- hashing::detail::stable_hash_append(Hash, B);
- hashing::detail::stable_hash_append(Hash, C);
- return Hash;
+ stable_hash Hashes[3] = {A, B, C};
+ return stable_hash_combine(Hashes);
}
inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
stable_hash C, stable_hash D) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- hashing::detail::stable_hash_append(Hash, A);
- hashing::detail::stable_hash_append(Hash, B);
- hashing::detail::stable_hash_append(Hash, C);
- hashing::detail::stable_hash_append(Hash, D);
- return Hash;
-}
-
-/// Compute a stable_hash for a sequence of values.
-///
-/// This hashes a sequence of values. It produces the same stable_hash as
-/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized
-/// sequences and is significantly faster given pointers and types which
-/// can be hashed as a sequence of bytes.
-template <typename InputIteratorT>
-stable_hash stable_hash_combine_range(InputIteratorT First,
- InputIteratorT Last) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- for (auto I = First; I != Last; ++I)
- hashing::detail::stable_hash_append(Hash, *I);
- return Hash;
+ stable_hash Hashes[4] = {A, B, C, D};
+ return stable_hash_combine(Hashes);
}
-inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- for (size_t I = 0; I < C; ++I)
- hashing::detail::stable_hash_append(Hash, P[I]);
- return Hash;
-}
} // namespace llvm
#endif
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index ace05902d5df79..a0726ca64910ea 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -424,8 +424,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
const uint32_t *RegMask = MO.getRegMask();
std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
return hash_combine(MO.getType(), MO.getTargetFlags(),
- stable_hash_combine_array(RegMaskHashes.data(),
- RegMaskHashes.size()));
+ stable_hash_combine(RegMaskHashes));
}
assert(0 && "MachineOperand not associated with any MachineFunction");
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index fb5e9a37d9b997..916acbf2d2cbf9 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -66,7 +66,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
SmallVector<stable_hash> DefOpcodes;
for (auto &Def : MRI.def_instructions(MO.getReg()))
DefOpcodes.push_back(Def.getOpcode());
- return stable_hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
+ return stable_hash_combine(DefOpcodes);
}
// Register operands don't have target flags.
@@ -78,8 +78,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_FPImmediate: {
auto Val = MO.isCImm() ? MO.getCImm()->getValue()
: MO.getFPImm()->getValueAPF().bitcastToAPInt();
- auto ValHash =
- stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+ auto ValHash = stable_hash_combine(
+ ArrayRef<stable_hash>(Val.getRawData(), Val.getNumWords()));
return stable_hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
}
@@ -126,10 +126,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
const uint32_t *RegMask = MO.getRegMask();
std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
RegMask + RegMaskSize);
- return stable_hash_combine(
- MO.getType(), MO.getTargetFlags(),
- stable_hash_combine_array(RegMaskHashes.data(),
- RegMaskHashes.size()));
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine(RegMaskHashes));
}
}
}
@@ -145,10 +143,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
[](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
- return stable_hash_combine(
- MO.getType(), MO.getTargetFlags(),
- stable_hash_combine_array(ShuffleMaskHashes.data(),
- ShuffleMaskHashes.size()));
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine(ShuffleMaskHashes));
}
case MachineOperand::MO_MCSymbol: {
auto SymbolName = MO.getMCSymbol()->getName();
@@ -212,8 +208,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
}
- return stable_hash_combine_range(HashComponents.begin(),
- HashComponents.end());
+ return stable_hash_combine(HashComponents);
}
stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
@@ -221,8 +216,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
// TODO: Hash more stuff like block alignment and branch probabilities.
for (const auto &MI : MBB)
HashComponents.push_back(stableHashValue(MI));
- return stable_hash_combine_range(HashComponents.begin(),
- HashComponents.end());
+ return stable_hash_combine(HashComponents);
}
stable_hash llvm::stableHashValue(const MachineFunction &MF) {
@@ -230,6 +224,5 @@ stable_hash llvm::stableHashValue(const MachineFunction &MF) {
// TODO: Hash lots more stuff like function alignment and stack objects.
for (const auto &MBB : MF)
HashComponents.push_back(stableHashValue(MBB));
- return stable_hash_combine_range(HashComponents.begin(),
- HashComponents.end());
+ return stable_hash_combine(HashComponents);
}
>From c9eb32c6f8462cb2d58d1580fda7be66d90b415f Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Wed, 24 Apr 2024 09:40:34 -0700
Subject: [PATCH 2/3] [MachineOutliner][NFC] Refactor
---
llvm/include/llvm/CodeGen/MachineOutliner.h | 5 +-
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 12 ++++-
llvm/lib/CodeGen/MachineOutliner.cpp | 55 +++++++++++---------
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +--
llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 +-
5 files changed, 48 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index eaba6c9b18f2bb..84937a8b563ac0 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -234,11 +234,11 @@ struct OutlinedFunction {
unsigned FrameConstructionID = 0;
/// Return the number of candidates for this \p OutlinedFunction.
- unsigned getOccurrenceCount() const { return Candidates.size(); }
+ virtual unsigned getOccurrenceCount() const { return Candidates.size(); }
/// Return the number of bytes it would take to outline this
/// function.
- unsigned getOutliningCost() const {
+ virtual unsigned getOutliningCost() const {
unsigned CallOverhead = 0;
for (const Candidate &C : Candidates)
CallOverhead += C.getCallOverhead();
@@ -272,6 +272,7 @@ struct OutlinedFunction {
}
OutlinedFunction() = delete;
+ virtual ~OutlinedFunction() = default;
};
} // namespace outliner
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 882cadea223695..a833a541e4e025 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2088,14 +2088,22 @@ class TargetInstrInfo : public MCInstrInfo {
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
/// information for a set of outlining candidates. Returns std::nullopt if the
- /// candidates are not suitable for outlining.
+ /// candidates are not suitable for outlining. \p MinRep is the minimum
+ /// number of times the instruction sequence must be repeated.
virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MipRep) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
}
+ virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
+ const MachineModuleInfo &MMI,
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ return getOutliningCandidateInfo(MMI, RepeatedSequenceLocs, /*MipRep=*/2);
+ }
+
/// Optional target hook to create the LLVM IR attributes for the outlined
/// function. If overridden, the overriding function must call the default
/// implementation.
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 4b56a467b8d076..eecf27613a2c31 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -456,8 +456,9 @@ struct MachineOutliner : public ModulePass {
/// \param Mapper Contains outlining mapping information.
/// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
/// each type of candidate.
- void findCandidates(InstructionMapper &Mapper,
- std::vector<OutlinedFunction> &FunctionList);
+ void
+ findCandidates(InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
/// Replace the sequences of instructions represented by \p OutlinedFunctions
/// with calls to functions.
@@ -465,7 +466,9 @@ struct MachineOutliner : public ModulePass {
/// \param M The module we are outlining from.
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
- bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
+ /// \param[out] OutlinedFunctionNum The outlined function number.
+ bool outline(Module &M,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
/// Creates a function for \p OF and inserts it into the module.
@@ -583,7 +586,8 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
}
void MachineOutliner::findCandidates(
- InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
@@ -684,7 +688,7 @@ void MachineOutliner::findCandidates(
continue;
}
- FunctionList.push_back(*OF);
+ FunctionList.push_back(std::make_unique<OutlinedFunction>(*OF));
}
}
@@ -827,10 +831,9 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
return &MF;
}
-bool MachineOutliner::outline(Module &M,
- std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper,
- unsigned &OutlinedFunctionNum) {
+bool MachineOutliner::outline(
+ Module &M, std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList,
+ InstructionMapper &Mapper, unsigned &OutlinedFunctionNum) {
LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
<< "\n");
@@ -838,23 +841,23 @@ bool MachineOutliner::outline(Module &M,
// Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
// The function with highest priority should be outlined first.
- stable_sort(FunctionList,
- [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
- return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
- RHS.getNotOutlinedCost() * LHS.getOutliningCost();
- });
+ stable_sort(FunctionList, [](const std::unique_ptr<OutlinedFunction> &LHS,
+ const std::unique_ptr<OutlinedFunction> &RHS) {
+ return LHS->getNotOutlinedCost() * RHS->getOutliningCost() >
+ RHS->getNotOutlinedCost() * LHS->getOutliningCost();
+ });
// Walk over each function, outlining them as we go along. Functions are
// outlined greedily, based off the sort above.
auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
- for (OutlinedFunction &OF : FunctionList) {
+ for (auto &OF : FunctionList) {
#ifndef NDEBUG
- auto NumCandidatesBefore = OF.Candidates.size();
+ auto NumCandidatesBefore = OF->Candidates.size();
#endif
// If we outlined something that overlapped with a candidate in a previous
// step, then we can't outline from it.
- erase_if(OF.Candidates, [&UnsignedVecBegin](Candidate &C) {
+ erase_if(OF->Candidates, [&UnsignedVecBegin](Candidate &C) {
return std::any_of(UnsignedVecBegin + C.getStartIdx(),
UnsignedVecBegin + C.getEndIdx() + 1, [](unsigned I) {
return I == static_cast<unsigned>(-1);
@@ -862,36 +865,36 @@ bool MachineOutliner::outline(Module &M,
});
#ifndef NDEBUG
- auto NumCandidatesAfter = OF.Candidates.size();
+ auto NumCandidatesAfter = OF->Candidates.size();
LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
<< "/" << NumCandidatesBefore << " candidates\n");
#endif
// If we made it unbeneficial to outline this function, skip it.
- if (OF.getBenefit() < OutlinerBenefitThreshold) {
- LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF.getBenefit()
+ if (OF->getBenefit() < OutlinerBenefitThreshold) {
+ LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF->getBenefit()
<< " B) < threshold (" << OutlinerBenefitThreshold
<< " B)\n");
continue;
}
- LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF.getBenefit()
+ LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF->getBenefit()
<< " B) > threshold (" << OutlinerBenefitThreshold
<< " B)\n");
// It's beneficial. Create the function and outline its sequence's
// occurrences.
- OF.MF = createOutlinedFunction(M, OF, Mapper, OutlinedFunctionNum);
- emitOutlinedFunctionRemark(OF);
+ OF->MF = createOutlinedFunction(M, *OF, Mapper, OutlinedFunctionNum);
+ emitOutlinedFunctionRemark(*OF);
FunctionsCreated++;
OutlinedFunctionNum++; // Created a function, move to the next name.
- MachineFunction *MF = OF.MF;
+ MachineFunction *MF = OF->MF;
const TargetSubtargetInfo &STI = MF->getSubtarget();
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Replace occurrences of the sequence with calls to the new function.
LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
- for (Candidate &C : OF.Candidates) {
+ for (Candidate &C : OF->Candidates) {
MachineBasicBlock &MBB = *C.getMBB();
MachineBasicBlock::iterator StartIt = C.begin();
MachineBasicBlock::iterator EndIt = std::prev(C.end());
@@ -1180,7 +1183,7 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
// Prepare instruction mappings for the suffix tree.
populateMapper(Mapper, M);
- std::vector<OutlinedFunction> FunctionList;
+ std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
// Find all of the outlining candidates.
findCandidates(Mapper, FunctionList);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 697ae510a95655..156ab6568f833e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -8687,7 +8687,8 @@ static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
std::optional<outliner::OutlinedFunction>
AArch64InstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRep) const {
unsigned SequenceSize = 0;
for (auto &MI : RepeatedSequenceLocs[0])
SequenceSize += getInstSizeInBytes(MI);
@@ -8801,7 +8802,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
- if (RepeatedSequenceLocs.size() < 2)
+ if (RepeatedSequenceLocs.size() < MinRep)
return std::nullopt;
}
@@ -9048,7 +9049,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
}
// If we dropped all of the candidates, bail out here.
- if (RepeatedSequenceLocs.size() < 2) {
+ if (RepeatedSequenceLocs.size() < MinRep) {
RepeatedSequenceLocs.clear();
return std::nullopt;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index a1f2fbff016312..762fb9873065e6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -473,7 +473,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool OutlineFromLinkOnceODRs) const override;
std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
- std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ std::vector<outliner::Candidate> &RepeatedSequenceLocs,
+ unsigned MinRep) const override;
void mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI,
>From 77698217d0d272a5ddd15ffb5b65871b4af741f7 Mon Sep 17 00:00:00 2001
From: Kyungwoo Lee <kyulee at meta.com>
Date: Fri, 23 Aug 2024 09:33:47 -0700
Subject: [PATCH 3/3] globaloutline2 on xxh3
---
llvm/include/llvm/CodeGen/MachineOutliner.h | 36 +++
llvm/lib/CGData/CodeGenData.cpp | 26 +-
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/MachineOutliner.cpp | 260 +++++++++++++++++-
llvm/lib/CodeGen/MachineStableHash.cpp | 20 +-
llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 +
.../CodeGen/AArch64/cgdata-global-hash.ll | 40 +++
.../CodeGen/AArch64/cgdata-outlined-name.ll | 41 +++
.../AArch64/cgdata-read-double-outline.ll | 57 ++++
.../AArch64/cgdata-read-lto-outline.ll | 96 +++++++
.../CodeGen/AArch64/cgdata-read-priority.ll | 68 +++++
.../AArch64/cgdata-read-single-outline.ll | 42 +++
.../CodeGen/AArch64/cgdata-write-outline.ll | 51 ++++
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 1 +
14 files changed, 734 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-global-hash.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll
create mode 100644 llvm/test/CodeGen/AArch64/cgdata-write-outline.ll
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index 84937a8b563ac0..3bda86f399b7b5 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineStableHash.h"
#include <initializer_list>
namespace llvm {
@@ -274,6 +275,41 @@ struct OutlinedFunction {
OutlinedFunction() = delete;
virtual ~OutlinedFunction() = default;
};
+
+/// The information necessary to create an outlined function that is matched
+/// globally.
+struct GlobalOutlinedFunction : public OutlinedFunction {
+ GlobalOutlinedFunction(OutlinedFunction &OF, unsigned GlobalOccurrenceCount)
+ : OutlinedFunction(OF.Candidates, OF.SequenceSize, OF.FrameOverhead,
+ OF.FrameConstructionID),
+ GlobalOccurrenceCount(GlobalOccurrenceCount) {}
+
+ unsigned GlobalOccurrenceCount;
+
+ /// Return the number of times that appear globally.
+ /// Global outlining candidate is uniquely created per each match, but this
+ /// might be erased out when it's overlapped with the previous outlining
+ /// instance.
+ unsigned getOccurrenceCount() const override {
+ assert(Candidates.size() <= 1);
+ return Candidates.empty() ? 0 : GlobalOccurrenceCount;
+ }
+
+ /// Return the outlining cost using the global occurrence count
+ /// with the same cost as the first (unique) candidate.
+ unsigned getOutliningCost() const override {
+ assert(Candidates.size() <= 1);
+ unsigned CallOverhead =
+ Candidates.empty()
+ ? 0
+ : Candidates[0].getCallOverhead() * getOccurrenceCount();
+ return CallOverhead + SequenceSize + FrameOverhead;
+ }
+
+ GlobalOutlinedFunction() = delete;
+ ~GlobalOutlinedFunction() = default;
+};
+
} // namespace outliner
} // namespace llvm
diff --git a/llvm/lib/CGData/CodeGenData.cpp b/llvm/lib/CGData/CodeGenData.cpp
index 9dd4b1674e094a..55d2504231c744 100644
--- a/llvm/lib/CGData/CodeGenData.cpp
+++ b/llvm/lib/CGData/CodeGenData.cpp
@@ -24,6 +24,13 @@
using namespace llvm;
using namespace cgdata;
+cl::opt<bool>
+ CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden,
+ cl::desc("Emit CodeGen Data into custom sections"));
+cl::opt<std::string>
+ CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
+ cl::desc("File path to where .cgdata file is read"));
+
static std::string getCGDataErrString(cgdata_error Err,
const std::string &ErrMsg = "") {
std::string Msg;
@@ -132,7 +139,24 @@ CodeGenData &CodeGenData::getInstance() {
std::call_once(CodeGenData::OnceFlag, []() {
Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
- // TODO: Initialize writer or reader mode for the client optimization.
+ if (CodeGenDataGenerate)
+ Instance->EmitCGData = true;
+ else if (!CodeGenDataUsePath.empty()) {
+ // Initialize the global CGData if the input file name is given.
+ // We do not error-out when failing to parse the input file.
+ // Instead, just emit an warning message and fall back as if no CGData
+ // were available.
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
+ if (Error E = ReaderOrErr.takeError()) {
+ warn(std::move(E), CodeGenDataUsePath);
+ return;
+ }
+ // Publish each CGData based on the data type in the header.
+ auto Reader = ReaderOrErr->get();
+ if (Reader->hasOutlinedHashTree())
+ Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
+ }
});
return *(Instance.get());
}
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index f1607f85c5b319..3e75737185c3ee 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -267,6 +267,7 @@ add_llvm_component_library(LLVMCodeGen
Analysis
BitReader
BitWriter
+ CGData
CodeGenTypes
Core
MC
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index eecf27613a2c31..b8b108dcc5be4b 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,7 +59,9 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -75,6 +77,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/SuffixTree.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <functional>
#include <tuple>
#include <vector>
@@ -98,6 +101,10 @@ STATISTIC(NumInvisible,
"Invisible instructions skipped during mapping");
STATISTIC(UnsignedVecSize,
"Total number of instructions mapped and saved to mapping vector");
+STATISTIC(StableHashAttempts,
+ "Count of hashing attempts made for outlined functions");
+STATISTIC(StableHashDropped,
+ "Count of unsuccessful hashing attempts for outlined functions");
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -128,6 +135,19 @@ static cl::opt<bool> OutlinerLeafDescendants(
"tree as candidates for outlining (if false, only leaf children "
"are considered)"));
+static cl::opt<bool>
+ DisableGlobalOutlining("disable-global-outlining", cl::Hidden,
+ cl::desc("Disable global outlining only by ignoring "
+ "the codegen data generation or use"),
+ cl::init(false));
+
+static cl::opt<bool> AppendContentHashToOutlinedName(
+ "append-content-hash-outlined-name", cl::Hidden,
+ cl::desc("This appends the content hash to the globally outlined function "
+ "name. It's beneficial for enhancing the precision of the stable "
+ "hash and for ordering the outlined functions."),
+ cl::init(true));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -421,11 +441,29 @@ struct MachineOutliner : public ModulePass {
/// Set when the pass is constructed in TargetPassConfig.
bool RunOnAllFunctions = true;
+ /// This is a compact representation of hash sequences of outlined functions.
+ /// It is used when OutlinerMode = CGDataMode::Write.
+ /// The resulting hash tree will be emitted into __llvm_outlined section
+ /// which will be dead-stripped not going to the final binary.
+ /// A post-process using llvm-cgdata, lld, or ThinLTO can merge them into
+ /// a global oulined hash tree for the subsequent codegen.
+ std::unique_ptr<OutlinedHashTree> LocalHashTree;
+
+ /// The mode of the outliner.
+ /// When is's CGDataMode::None, candidates are populated with the suffix tree
+ /// within a module and outlined.
+ /// When it's CGDataMode::Write, in addition to CGDataMode::None, the hash
+ /// sequences of outlined functions are published into LocalHashTree.
+ /// When it's CGDataMode::Read, candidates are populated with the global
+ /// outlined hash tree that has been built by the previous codegen.
+ CGDataMode OutlinerMode = CGDataMode::None;
+
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
+ AU.addRequired<ImmutableModuleSummaryIndexWrapperPass>();
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
@@ -460,6 +498,16 @@ struct MachineOutliner : public ModulePass {
findCandidates(InstructionMapper &Mapper,
std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
+ /// Find all repeated substrings that match in the global outlined hash
+ /// tree built from the previous codegen.
+ ///
+ /// \param Mapper Contains outlining mapping information.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions
+ /// each type of candidate.
+ void findGlobalCandidates(
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList);
+
/// Replace the sequences of instructions represented by \p OutlinedFunctions
/// with calls to functions.
///
@@ -476,6 +524,17 @@ struct MachineOutliner : public ModulePass {
InstructionMapper &Mapper,
unsigned Name);
+ /// Compute and publish the stable hash sequence of instructions in the
+ /// outlined function, \p MF. The parameter \p CandSize represents the number
+ /// of candidates that have identical instruction sequences to \p MF.
+ void computeAndPublishHashSequence(MachineFunction &MF, unsigned CandSize);
+
+ /// Initialize the outliner mode.
+ void initializeOutlinerMode(const Module &M);
+
+ /// Emit the outlined hash tree into __llvm_outline section.
+ void emitOutlinedHashTree(Module &M);
+
/// Calls 'doOutline()' 1 + OutlinerReruns times.
bool runOnModule(Module &M) override;
@@ -585,6 +644,109 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
+struct MatchedEntry {
+ size_t StartIdx;
+ size_t Length;
+ size_t Count;
+};
+
+static const HashNode *followHashNode(stable_hash StableHash,
+ const HashNode *Current) {
+ auto I = Current->Successors.find(StableHash);
+ return (I == Current->Successors.end()) ? nullptr : I->second.get();
+}
+
+// Find all matches in the global outlined hash tree.
+// It's quadratic complexity in theory, but it's nearly linear in practice
+// since the length of outlined sequences are small within a block.
+static std::vector<MatchedEntry> getMatchedEntries(InstructionMapper &Mapper) {
+ auto &InstrList = Mapper.InstrList;
+ auto &UnsignedVec = Mapper.UnsignedVec;
+
+ std::vector<MatchedEntry> MatchedEntries;
+ std::vector<stable_hash> Sequence;
+ auto Size = UnsignedVec.size();
+
+ // Get the global outlined hash tree built from the previous run.
+ assert(cgdata::hasOutlinedHashTree());
+ const auto *RootNode = cgdata::getOutlinedHashTree()->getRoot();
+ for (size_t I = 0; I < Size; ++I) {
+ // skip the invalid mapping that represents a large negative value.
+ if (UnsignedVec[I] >= Size)
+ continue;
+ const MachineInstr &MI = *InstrList[I];
+ // skip debug instructions as we did for the outlined function.
+ if (MI.isDebugInstr())
+ continue;
+ // skip the empty hash value.
+ stable_hash StableHashI = stableHashValue(MI);
+ if (!StableHashI)
+ continue;
+ Sequence.clear();
+ Sequence.push_back(StableHashI);
+
+ const HashNode *LastNode = followHashNode(StableHashI, RootNode);
+ if (!LastNode)
+ continue;
+
+ size_t J = I + 1;
+ for (; J < Size; ++J) {
+ // break on the invalid mapping that represents a large negative value.
+ if (UnsignedVec[J] >= Size)
+ break;
+ // ignore debug instructions as we did for the outlined function.
+ const MachineInstr &MJ = *InstrList[J];
+ if (MJ.isDebugInstr())
+ continue;
+ // break on the empty hash value.
+ stable_hash StableHashJ = stableHashValue(MJ);
+ if (!StableHashJ)
+ break;
+ LastNode = followHashNode(StableHashJ, LastNode);
+ if (!LastNode)
+ break;
+
+ // Even with a match ending with a terminal, we continue finding
+ // matches to populate all candidates.
+ Sequence.push_back(StableHashJ);
+ auto Count = LastNode->Terminals;
+ if (Count)
+ MatchedEntries.push_back({I, J - I + 1, *Count});
+ }
+ }
+
+ return MatchedEntries;
+}
+
+void MachineOutliner::findGlobalCandidates(
+ InstructionMapper &Mapper,
+ std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
+ FunctionList.clear();
+ auto &InstrList = Mapper.InstrList;
+ auto &MBBFlagsMap = Mapper.MBBFlagsMap;
+
+ std::vector<Candidate> CandidatesForRepeatedSeq;
+ for (auto &ME : getMatchedEntries(Mapper)) {
+ CandidatesForRepeatedSeq.clear();
+ MachineBasicBlock::iterator StartIt = InstrList[ME.StartIdx];
+ MachineBasicBlock::iterator EndIt = InstrList[ME.StartIdx + ME.Length - 1];
+ MachineBasicBlock *MBB = StartIt->getParent();
+ Candidate C(ME.StartIdx, ME.Length, StartIt, EndIt, MBB,
+ FunctionList.size(), MBBFlagsMap[MBB]);
+ CandidatesForRepeatedSeq.push_back(C);
+ const TargetInstrInfo *TII = C.getMF()->getSubtarget().getInstrInfo();
+ std::optional<OutlinedFunction> OF = TII->getOutliningCandidateInfo(
+ *MMI, CandidatesForRepeatedSeq, /*MinRep=*/1);
+ if (!OF || OF->Candidates.empty())
+ continue;
+ // We create a global candidate each match.
+ assert(OF->Candidates.size() == 1);
+
+ FunctionList.push_back(
+ std::make_unique<GlobalOutlinedFunction>(*OF, ME.Count));
+ }
+}
+
void MachineOutliner::findCandidates(
InstructionMapper &Mapper,
std::vector<std::unique_ptr<OutlinedFunction>> &FunctionList) {
@@ -692,6 +854,40 @@ void MachineOutliner::findCandidates(
}
}
+void MachineOutliner::computeAndPublishHashSequence(MachineFunction &MF,
+ unsigned CandSize) {
+ // Compute the hash sequence for the outlined function.
+ SmallVector<stable_hash> OutlinedHashSequence;
+ for (auto &MBB : MF) {
+ for (auto &NewMI : MBB) {
+ stable_hash Hash = stableHashValue(NewMI);
+ if (!Hash) {
+ OutlinedHashSequence.clear();
+ break;
+ }
+ OutlinedHashSequence.push_back(Hash);
+ }
+ }
+
+ // Append a unique name based on the non-empty hash sequence.
+ if (AppendContentHashToOutlinedName && !OutlinedHashSequence.empty()) {
+ auto CombinedHash = stable_hash_combine_range(OutlinedHashSequence.begin(),
+ OutlinedHashSequence.end());
+ auto NewName =
+ MF.getName().str() + ".content." + std::to_string(CombinedHash);
+ MF.getFunction().setName(NewName);
+ }
+
+ // Publish the non-empty hash sequence to the local hash tree.
+ if (OutlinerMode == CGDataMode::Write) {
+ StableHashAttempts++;
+ if (!OutlinedHashSequence.empty())
+ LocalHashTree->insert({OutlinedHashSequence, CandSize});
+ else
+ StableHashDropped++;
+ }
+}
+
MachineFunction *MachineOutliner::createOutlinedFunction(
Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
@@ -767,6 +963,9 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
}
}
+ if (OutlinerMode != CGDataMode::None)
+ computeAndPublishHashSequence(MF, OF.Candidates.size());
+
// Set normal properties for a late MachineFunction.
MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
@@ -1131,12 +1330,65 @@ void MachineOutliner::emitInstrCountChangedRemark(
}
}
+void MachineOutliner::initializeOutlinerMode(const Module &M) {
+ if (DisableGlobalOutlining)
+ return;
+
+ if (auto *IndexWrapperPass =
+ getAnalysisIfAvailable<ImmutableModuleSummaryIndexWrapperPass>()) {
+ auto *TheIndex = IndexWrapperPass->getIndex();
+ // (Full)LTO module does not have functions added to the index.
+ // In this case, we run the outliner without using codegen data as usual.
+ if (TheIndex && !TheIndex->hasExportedFunctions(M))
+ return;
+ }
+
+ // When codegen data write is enabled, we want to write the local outlined
+ // hash tree to the custom section, `__llvm_outline`.
+ // When the outlined hash tree is available from the previous codegen data,
+ // we want to read it to optimistically create global outlining candidates.
+ if (cgdata::emitCGData()) {
+ OutlinerMode = CGDataMode::Write;
+ // Create a local outlined hash tree to be published.
+ LocalHashTree.reset(new OutlinedHashTree());
+ // We don't need to read the outlined hash tree from the previous codegen
+ } else if (cgdata::hasOutlinedHashTree())
+ OutlinerMode = CGDataMode::Read;
+}
+
+void MachineOutliner::emitOutlinedHashTree(Module &M) {
+ assert(LocalHashTree);
+ if (!LocalHashTree->empty()) {
+ LLVM_DEBUG({
+ dbgs() << "Emit outlined hash tree. Size: " << LocalHashTree->size()
+ << "\n";
+ });
+ SmallVector<char> Buf;
+ raw_svector_ostream OS(Buf);
+
+ OutlinedHashTreeRecord HTR(std::move(LocalHashTree));
+ HTR.serialize(OS);
+
+ llvm::StringRef Data(Buf.data(), Buf.size());
+ std::unique_ptr<MemoryBuffer> Buffer =
+ MemoryBuffer::getMemBuffer(Data, "in-memory outlined hash tree", false);
+
+ Triple TT(M.getTargetTriple());
+ embedBufferInModule(
+ M, *Buffer.get(),
+ getCodeGenDataSectionName(CG_outline, TT.getObjectFormat()));
+ }
+}
+
bool MachineOutliner::runOnModule(Module &M) {
// Check if there's anything in the module. If it's empty, then there's
// nothing to outline.
if (M.empty())
return false;
+ // Initialize the outliner mode.
+ initializeOutlinerMode(M);
+
MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// Number to append to the current outlined function.
@@ -1158,6 +1410,9 @@ bool MachineOutliner::runOnModule(Module &M) {
}
}
+ if (OutlinerMode == CGDataMode::Write)
+ emitOutlinedHashTree(M);
+
return true;
}
@@ -1186,7 +1441,10 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
std::vector<std::unique_ptr<OutlinedFunction>> FunctionList;
// Find all of the outlining candidates.
- findCandidates(Mapper, FunctionList);
+ if (OutlinerMode == CGDataMode::Read)
+ findGlobalCandidates(Mapper, FunctionList);
+ else
+ findCandidates(Mapper, FunctionList);
// If we've requested size remarks, then collect the MI counts of every
// function before outlining, and the MI counts after outlining.
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index 916acbf2d2cbf9..844bc9e36442f4 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -95,9 +95,22 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_Metadata:
StableHashBailingMetadataUnsupported++;
return 0;
- case MachineOperand::MO_GlobalAddress:
- StableHashBailingGlobalAddress++;
- return 0;
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ if (GV->hasPrivateLinkage() || !GV->hasName()) {
+ StableHashBailingGlobalAddress++;
+ return 0;
+ }
+ auto Name = GV->getName();
+ // Use the content hash of the outlined function.
+ auto Pos = Name.find_last_of(".content.");
+ if (Pos != StringRef::npos) {
+ assert(Name.starts_with("OUTLINED_FUNCTION"));
+ Name = Name.substr(Pos);
+ }
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ xxh3_64bits(Name), MO.getOffset());
+ }
case MachineOperand::MO_TargetIndex: {
if (const char *Name = MO.getTargetIndexName())
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
@@ -142,7 +155,6 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
llvm::transform(
MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
[](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
-
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
stable_hash_combine(ShuffleMaskHashes));
}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index 3465b717261cf5..66ce960462c63d 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -16,6 +16,7 @@
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
+; CHECK-NEXT: Module summary info
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
diff --git a/llvm/test/CodeGen/AArch64/cgdata-global-hash.ll b/llvm/test/CodeGen/AArch64/cgdata-global-hash.ll
new file mode 100644
index 00000000000000..c425eda56f5d5b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-global-hash.ll
@@ -0,0 +1,40 @@
+; This test verifies the stable hash values for different global variables
+; that have distinct names.
+; We generate two different cgdata files from nearly identical outline instances,
+; with the only difference being the last call target globals, @g vs @h.
+
+; RUN: split-file %s %t
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-g.ll -o %t/local-g.o
+; RUN: llvm-cgdata --merge %t/local-g.o -o %t/local-g.cgdata
+; RUN: llvm-cgdata --convert %t/local-g.cgdata -o %t/local-g.cgtext
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-h.ll -o %t/local-h.o
+; RUN: llvm-cgdata --merge %t/local-h.o -o %t/local-h.cgdata
+; RUN: llvm-cgdata --convert %t/local-h.cgdata -o %t/local-h.cgtext
+
+; We compare the trees which are only different at the terminal node's hash value.
+; Here we simply count the different lines that have `Hash` string.
+; RUN: not diff %t/local-g.cgtext %t/local-h.cgtext 2>&1 | grep Hash | wc -l | FileCheck %s
+; CHECK: 2
+
+;--- local-g.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-h.ll
+declare i32 @h(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @h(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @h(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll b/llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll
new file mode 100644
index 00000000000000..69f1ecd6515e7e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-outlined-name.ll
@@ -0,0 +1,41 @@
+; This test verifies the globally outlined function name has the content hash.
+
+; RUN: split-file %s %t
+
+; Check if the outlined function name has the content hash depending the flag.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -append-content-hash-outlined-name=false -filetype=obj %t/local-two.ll -o %t_write_base
+; RUN: llvm-objdump -d %t_write_base | FileCheck %s --check-prefix=BASE
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -append-content-hash-outlined-name=true -filetype=obj %t/local-two.ll -o %t_write_suffix
+; RUN: llvm-objdump -d %t_write_suffix | FileCheck %s --check-prefix=SUFFIX
+; BASE-NOT: _OUTLINED_FUNCTION_{{.*}}.content.{{[0-9]+}}
+; SUFFIX: _OUTLINED_FUNCTION_{{.*}}.content.{{[0-9]+}}
+
+; Generate the cgdata file from each case and show they are identical.
+; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
+; RUN: llvm-cgdata --merge %t_write_suffix -o %t_cgdata_suffix
+; RUN: diff %t_cgdata_base %t_cgdata_suffix
+
+; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
+; Check if the outlined function has the content hash depending the flag.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -append-content-hash-outlined-name=false -filetype=obj %t/local-one.ll -o %t_read_base
+; RUN: llvm-objdump -d %t_read_base | FileCheck %s --check-prefix=BASE
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_suffix -append-content-hash-outlined-name=true -filetype=obj %t/local-one.ll -o %t_read_suffix
+; RUN: llvm-objdump -d %t_read_suffix | FileCheck %s --check-prefix=SUFFIX
+
+;--- local-two.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-one.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll
new file mode 100644
index 00000000000000..6e027308c17068
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-double-outline.ll
@@ -0,0 +1,57 @@
+; This test demonstrates how identical instruction sequences are handled during global outlining.
+; Currently, we do not attempt to share an outlined function for identical sequences.
+; Instead, each instruction sequence that matches against the global outlined hash tree
+; is outlined into its own unique function.
+
+; RUN: split-file %s %t
+
+; First, we generate the cgdata file from a local outline instance present in local-two.ll.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-two.ll -o %t_write
+; RUN: llvm-cgdata --merge %t_write -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 4
+; SHOW-NEXT: Terminal Node Count: 1
+; SHOW-NEXT: Depth: 3
+
+; Now, we read the cgdata for local-two-another.ll and proceed to optimistically outline
+; each instruction sequence that matches against the global outlined hash tree.
+; Since each matching sequence is considered a candidate, we expect to generate two
+; unique outlined functions. These functions, although unique, will be identical in code,
+; and thus, will be folded by the linker.
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/local-two-another.ll -o %t_read
+; RUN: llvm-objdump -d %t_read | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+;--- local-two.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-two-another.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 40, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll
new file mode 100644
index 00000000000000..f1a5d1a0ccc7f0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-lto-outline.ll
@@ -0,0 +1,96 @@
+; This test is similar to cgdata-read-double-outline.ll, but it is executed with LTO (Link Time Optimization).
+; It demonstrates how identical instruction sequences are handled during global outlining.
+; Currently, we do not attempt to reuse an outlined function for identical sequences.
+; Instead, each instruction sequence that appears in the global outlined hash tree
+; is outlined into its own unique function.
+
+; RUN: split-file %s %t
+
+; We first create the cgdata file from a local outline instance in local-two.ll
+; RUN: opt -module-summary %t/local-two.ll -o %t/write.bc
+; RUN: llvm-lto2 run %t/write.bc -o %t/write \
+; RUN: -r %t/write.bc,_f1,px -r %t/write.bc,_f2,px -r %t/write.bc,_g,p \
+; RUN: -codegen-data-generate=true
+; RUN: llvm-cgdata --merge %t/write.1 -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 4
+; SHOW-NEXT: Terminal Node Count: 1
+; SHOW-NEXT: Depth: 3
+
+; Now, we execute either ThinLTO or LTO by reading the cgdata for local-two-another.ll.
+; With ThinLTO, similar to the no-LTO scenario shown in cgdata-read-double-outline.ll,
+; it optimistically outlines each instruction sequence that matches against
+; the global outlined hash tree. Since each matching sequence is considered a candidate,
+; we expect to generate two unique outlined functions that will be folded
+; by the linker at a later stage.
+; However, with LTO, we do not utilize the cgdata, but instead fall back to the default
+; outliner mode. This results in a single outlined function that is
+; shared across two call-sites.
+
+; Run ThinLTO
+; RUN: opt -module-summary %t/local-two-another.ll -o %t/thinlto.bc
+; RUN: llvm-lto2 run %t/thinlto.bc -o %t/thinlto \
+; RUN: -r %t/thinlto.bc,_f3,px -r %t/thinlto.bc,_f4,px -r %t/thinlto.bc,_g,p \
+; RUN: -codegen-data-use-path=%t_cgdata
+; RUN: llvm-objdump -d %t/thinlto.1 | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+; CHECK: _OUTLINED_FUNCTION_{{.*}}:
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+; Run ThinLTO while disabling the global outliner.
+; We have a single outlined case with the default outliner.
+; RUN: llvm-lto2 run %t/thinlto.bc -o %t/thinlto-disable \
+; RUN: -r %t/thinlto.bc,_f3,px -r %t/thinlto.bc,_f4,px -r %t/thinlto.bc,_g,p \
+; RUN: -enable-machine-outliner \
+; RUN: -codegen-data-use-path=%t_cgdata \
+; RUN: -disable-global-outlining
+; RUN: llvm-objdump -d %t/thinlto-disable.1 | FileCheck %s --check-prefix=DISABLE
+
+; DISABLE: _OUTLINED_FUNCTION_{{.*}}:
+; DISABLE-NEXT: mov
+; DISABLE-NEXT: mov
+; DISABLE-NEXT: b
+; DISABLE-NOT: _OUTLINED_FUNCTION_{{.*}}:
+
+; Run LTO, which effectively disables the global outliner.
+; RUN: opt %t/local-two-another.ll -o %t/lto.bc
+; RUN: llvm-lto2 run %t/lto.bc -o %t/lto \
+; RUN: -r %t/lto.bc,_f3,px -r %t/lto.bc,_f4,px -r %t/lto.bc,_g,p \
+; RUN: -enable-machine-outliner \
+; RUN: -codegen-data-use-path=%t_cgdata
+; RUN: llvm-objdump -d %t/lto.0 | FileCheck %s --check-prefix=DISABLE
+
+;--- local-two.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-two-another.ll
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-darwin"
+
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 40, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll b/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
new file mode 100644
index 00000000000000..affeea8c71acd3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-priority.ll
@@ -0,0 +1,68 @@
+; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
+; using codegen data that has been read from a previous codegen run.
+; When multiple matches occur, we prioritize the candidates using the global frequency.
+
+; RUN: split-file %s %t
+
+; First, we generate the cgdata file from local outline instances present in write1.ll and write2.ll
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/write1.ll -o %t_write1
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/write2.ll -o %t_write2
+; RUN: llvm-cgdata --merge %t_write1 %t_write2 -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 8
+; SHOW-NEXT: Terminal Node Count: 2
+; SHOW-NEXT: Depth: 4
+
+; Now, we read the cgdata in the machine outliner, enabling us to optimistically
+; outline a singleton instance in read.ll that matches against the cgdata.
+; There are two matches -- (1) (mov #1, mov #2, mov #3, b) and (2) (mov #2, mov #3, b).
+; Even though sequence (1) is longer than sequence (2), the latter is outlined because it occurs more frequently in the outlined hash tree.
+
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/read.ll -o %t_read
+; RUN: llvm-objdump -d %t_read | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+;--- write1.ll
+; The sequence (mov #2, mov #3, b) are repeated 4 times.
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 50, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 60, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 70, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f4() minsize {
+ %1 = call i32 @g(i32 40, i32 80, i32 2, i32 3);
+ ret i32 %1
+}
+
+;--- write2.ll
+; The sequence (mov #1, mov #2, mov #3, b) are repeated 2 times.
+declare i32 @g(i32, i32, i32)
+define i32 @f6() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2, i32 3);
+ ret i32 %1
+}
+define i32 @f7() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2, i32 3);
+ ret i32 %1
+}
+
+;--- read.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2, i32 3);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll
new file mode 100644
index 00000000000000..7725648a6bc3d5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-read-single-outline.ll
@@ -0,0 +1,42 @@
+; This test verifies whether we can outline a singleton instance (i.e., an instance that does not repeat)
+; using codegen data that has been read from a previous codegen run.
+
+; RUN: split-file %s %t
+
+; First, we generate the cgdata file from a local outline instance present in local-two.ll.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %t/local-two.ll -o %t_write
+; RUN: llvm-cgdata --merge %t_write -o %t_cgdata
+; RUN: llvm-cgdata --show %t_cgdata | FileCheck %s --check-prefix=SHOW
+
+; SHOW: Outlined hash tree:
+; SHOW-NEXT: Total Node Count: 4
+; SHOW-NEXT: Terminal Node Count: 1
+; SHOW-NEXT: Depth: 3
+
+; Now, we read the cgdata in the machine outliner, enabling us to optimistically
+; outline a singleton instance in local-one.ll that matches against the cgdata.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata -filetype=obj %t/local-one.ll -o %t_read
+; RUN: llvm-objdump -d %t_read | FileCheck %s
+
+; CHECK: _OUTLINED_FUNCTION
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+
+;--- local-two.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
+
+;--- local-one.ll
+declare i32 @g(i32, i32, i32)
+define i32 @f3() minsize {
+ %1 = call i32 @g(i32 30, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/AArch64/cgdata-write-outline.ll b/llvm/test/CodeGen/AArch64/cgdata-write-outline.ll
new file mode 100644
index 00000000000000..09ad499190ee37
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cgdata-write-outline.ll
@@ -0,0 +1,51 @@
+; This test verifies whether an outlined function is encoded into the __llvm_outline section
+; when the -codegen-data-generate flag is used.
+
+; Verify whether an outlined function is always created, but only encoded into the section when the flag is used.
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=true -filetype=obj %s -o %t_save
+; RUN: llvm-objdump -d %t_save | FileCheck %s
+; RUN: llvm-objdump -h %t_save | FileCheck %s --check-prefix=SECTNAME
+; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate=false -filetype=obj %s -o %t_nosave
+; RUN: llvm-objdump -d %t_nosave | FileCheck %s
+; RUN: llvm-objdump -h %t_nosave | FileCheck %s --check-prefix=NOSECTNAME
+
+; CHECK: _OUTLINED_FUNCTION
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
+; CHECK-NEXT: b
+; SECTNAME: __llvm_outline
+; NOSECTNAME-NOT: __llvm_outline
+
+; Verify the content of cgdata after it has been processed with llvm-cgdata.
+; RUN: llvm-cgdata --merge %t_save -o %t_cgdata
+; RUN: llvm-cgdata --convert %t_cgdata | FileCheck %s --check-prefix=TREE
+
+; TREE: :outlined_hash_tree
+; TREE: ---
+; TREE-NEXT: 0:
+; TREE-NEXT: Hash: 0x0
+; TREE-NEXT: Terminals: 0
+; TREE-NEXT: SuccessorIds: [ 1 ]
+; TREE-NEXT: 1:
+; TREE-NEXT: Hash: {{.}}
+; TREE-NEXT: Terminals: 0
+; TREE-NEXT: SuccessorIds: [ 2 ]
+; TREE-NEXT: 2:
+; TREE-NEXT: Hash: {{.}}
+; TREE-NEXT: Terminals: 0
+; TREE-NEXT: SuccessorIds: [ 3 ]
+; TREE-NEXT: 3:
+; TREE-NEXT: Hash: {{.}}
+; TREE-NEXT: Terminals: 2
+; TREE-NEXT: SuccessorIds: [ ]
+; TREE-NEXT: ...
+
+declare i32 @g(i32, i32, i32)
+define i32 @f1() minsize {
+ %1 = call i32 @g(i32 10, i32 1, i32 2);
+ ret i32 %1
+}
+define i32 @f2() minsize {
+ %1 = call i32 @g(i32 20, i32 1, i32 2);
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 44c270fdc3c257..7749f0db0c54d3 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -20,6 +20,7 @@
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: Default Regalloc Eviction Advisor
; CHECK-NEXT: Default Regalloc Priority Advisor
+; CHECK-NEXT: Module summary info
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
; CHECK-NEXT: FunctionPass Manager
More information about the llvm-commits
mailing list