[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 00:11:42 PST 2025
https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/166324
>From 47e051c851ae7e91ab8fba49a551b56a265b2584 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 4 Nov 2025 07:29:48 +0000
Subject: [PATCH] X86: Add prefetch insertion based on Propeller profile
This commit introduces a new pass for prefetch insertion on X86 targets.
The pass utilizes Propeller profiles to guide prefetch placement,
optimizing memory access patterns.
The new file llvm/lib/Target/X86/PrefetchInsertion.cpp implements this
functionality. This commit also includes necessary modifications to
related CodeGen and X86 target files to integrate the new pass.
A build issue where PrefetchInsertion.cpp was not included in the
CMakeLists.txt was also resolved.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 47 +++-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 170 ++++++-------
llvm/include/llvm/CodeGen/MachineInstr.h | 90 +++----
llvm/include/llvm/InitializePasses.h | 1 +
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 176 +++++++++-----
llvm/lib/CodeGen/BasicBlockSections.cpp | 6 +-
.../BasicBlockSectionsProfileReader.cpp | 86 ++++++-
llvm/lib/CodeGen/CodeGenPrepare.cpp | 62 ++---
llvm/lib/CodeGen/MachineBasicBlock.cpp | 88 ++++---
llvm/lib/Target/X86/CMakeLists.txt | 223 +++++++++---------
llvm/lib/Target/X86/PrefetchInsertion.cpp | 209 ++++++++++++++++
llvm/lib/Target/X86/X86.h | 13 +-
llvm/lib/Target/X86/X86TargetMachine.cpp | 16 +-
13 files changed, 791 insertions(+), 396 deletions(-)
create mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 7b1a5f5019589..ce47b461496bd 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
+struct BBPosition {
+ UniqueBBID BBID;
+ unsigned BBOffset;
+};
+
+struct PrefetchHint {
+ BBPosition SitePosition;
+ StringRef TargetFunctionName;
+ BBPosition TargetPosition;
+};
+
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
+ SmallVector<PrefetchHint> PrefetchHints;
+ DenseSet<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
- // Edge counts for each edge, stored as a nested map.
+ // Edge counts for each edge.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
// Hash for each basic block. The Hashes are stored for every original block
// (not cloned blocks), hence the map key being unsigned instead of
@@ -60,11 +73,32 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+ static inline BBPosition getEmptyKey() {
+ return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline BBPosition getTombstoneKey() {
+ return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const BBPosition &Val) {
+ std::pair<unsigned, unsigned> PairVal = std::make_pair(
+ DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+ return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+ DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+ }
+};
+
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
BasicBlockSectionsProfileReader(const MemoryBuffer *Buf)
- : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'){};
+ : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') {};
BasicBlockSectionsProfileReader() = default;
@@ -90,6 +124,11 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -198,6 +237,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 71739278cf513..deff97416df23 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
+struct PrefetchTarget {
+ StringRef TargetFunction;
+ UniqueBBID TargetBBID;
+ unsigned TargetBBOffset;
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -229,6 +237,8 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
+ mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -254,9 +264,7 @@ class MachineBasicBlock
/// Remove the reference to the underlying IR BasicBlock. This is for
/// reduction tools and should generally not be used.
- void clearBasicBlock() {
- BB = nullptr;
- }
+ void clearBasicBlock() { BB = nullptr; }
/// Check if there is a name of corresponding LLVM basic block.
LLVM_ABI bool hasName() const;
@@ -348,24 +356,24 @@ class MachineBasicBlock
LLVM_ABI bool sizeWithoutDebugLargerThan(unsigned Limit) const;
bool empty() const { return Insts.empty(); }
- MachineInstr &instr_front() { return Insts.front(); }
- MachineInstr &instr_back() { return Insts.back(); }
+ MachineInstr &instr_front() { return Insts.front(); }
+ MachineInstr &instr_back() { return Insts.back(); }
const MachineInstr &instr_front() const { return Insts.front(); }
- const MachineInstr &instr_back() const { return Insts.back(); }
-
- MachineInstr &front() { return Insts.front(); }
- MachineInstr &back() { return *--end(); }
- const MachineInstr &front() const { return Insts.front(); }
- const MachineInstr &back() const { return *--end(); }
-
- instr_iterator instr_begin() { return Insts.begin(); }
- const_instr_iterator instr_begin() const { return Insts.begin(); }
- instr_iterator instr_end() { return Insts.end(); }
- const_instr_iterator instr_end() const { return Insts.end(); }
- reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
+ const MachineInstr &instr_back() const { return Insts.back(); }
+
+ MachineInstr &front() { return Insts.front(); }
+ MachineInstr &back() { return *--end(); }
+ const MachineInstr &front() const { return Insts.front(); }
+ const MachineInstr &back() const { return *--end(); }
+
+ instr_iterator instr_begin() { return Insts.begin(); }
+ const_instr_iterator instr_begin() const { return Insts.begin(); }
+ instr_iterator instr_end() { return Insts.end(); }
+ const_instr_iterator instr_end() const { return Insts.end(); }
+ reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); }
- reverse_instr_iterator instr_rend () { return Insts.rend(); }
- const_reverse_instr_iterator instr_rend () const { return Insts.rend(); }
+ reverse_instr_iterator instr_rend() { return Insts.rend(); }
+ const_reverse_instr_iterator instr_rend() const { return Insts.rend(); }
using instr_range = iterator_range<instr_iterator>;
using const_instr_range = iterator_range<const_instr_iterator>;
@@ -374,10 +382,10 @@ class MachineBasicBlock
return const_instr_range(instr_begin(), instr_end());
}
- iterator begin() { return instr_begin(); }
- const_iterator begin() const { return instr_begin(); }
- iterator end () { return instr_end(); }
- const_iterator end () const { return instr_end(); }
+ iterator begin() { return instr_begin(); }
+ const_iterator begin() const { return instr_begin(); }
+ iterator end() { return instr_end(); }
+ const_iterator end() const { return instr_end(); }
reverse_iterator rbegin() {
return reverse_iterator::getAtBundleBegin(instr_rbegin());
}
@@ -424,38 +432,30 @@ class MachineBasicBlock
SmallVectorImpl<MachineBasicBlock *>::reverse_iterator;
using const_succ_reverse_iterator =
SmallVectorImpl<MachineBasicBlock *>::const_reverse_iterator;
- pred_iterator pred_begin() { return Predecessors.begin(); }
- const_pred_iterator pred_begin() const { return Predecessors.begin(); }
- pred_iterator pred_end() { return Predecessors.end(); }
- const_pred_iterator pred_end() const { return Predecessors.end(); }
- pred_reverse_iterator pred_rbegin()
- { return Predecessors.rbegin();}
- const_pred_reverse_iterator pred_rbegin() const
- { return Predecessors.rbegin();}
- pred_reverse_iterator pred_rend()
- { return Predecessors.rend(); }
- const_pred_reverse_iterator pred_rend() const
- { return Predecessors.rend(); }
- unsigned pred_size() const {
- return (unsigned)Predecessors.size();
- }
- bool pred_empty() const { return Predecessors.empty(); }
- succ_iterator succ_begin() { return Successors.begin(); }
- const_succ_iterator succ_begin() const { return Successors.begin(); }
- succ_iterator succ_end() { return Successors.end(); }
- const_succ_iterator succ_end() const { return Successors.end(); }
- succ_reverse_iterator succ_rbegin()
- { return Successors.rbegin(); }
- const_succ_reverse_iterator succ_rbegin() const
- { return Successors.rbegin(); }
- succ_reverse_iterator succ_rend()
- { return Successors.rend(); }
- const_succ_reverse_iterator succ_rend() const
- { return Successors.rend(); }
- unsigned succ_size() const {
- return (unsigned)Successors.size();
- }
- bool succ_empty() const { return Successors.empty(); }
+ pred_iterator pred_begin() { return Predecessors.begin(); }
+ const_pred_iterator pred_begin() const { return Predecessors.begin(); }
+ pred_iterator pred_end() { return Predecessors.end(); }
+ const_pred_iterator pred_end() const { return Predecessors.end(); }
+ pred_reverse_iterator pred_rbegin() { return Predecessors.rbegin(); }
+ const_pred_reverse_iterator pred_rbegin() const {
+ return Predecessors.rbegin();
+ }
+ pred_reverse_iterator pred_rend() { return Predecessors.rend(); }
+ const_pred_reverse_iterator pred_rend() const { return Predecessors.rend(); }
+ unsigned pred_size() const { return (unsigned)Predecessors.size(); }
+ bool pred_empty() const { return Predecessors.empty(); }
+ succ_iterator succ_begin() { return Successors.begin(); }
+ const_succ_iterator succ_begin() const { return Successors.begin(); }
+ succ_iterator succ_end() { return Successors.end(); }
+ const_succ_iterator succ_end() const { return Successors.end(); }
+ succ_reverse_iterator succ_rbegin() { return Successors.rbegin(); }
+ const_succ_reverse_iterator succ_rbegin() const {
+ return Successors.rbegin();
+ }
+ succ_reverse_iterator succ_rend() { return Successors.rend(); }
+ const_succ_reverse_iterator succ_rend() const { return Successors.rend(); }
+ unsigned succ_size() const { return (unsigned)Successors.size(); }
+ bool succ_empty() const { return Successors.empty(); }
inline iterator_range<pred_iterator> predecessors() {
return make_range(pred_begin(), pred_end());
@@ -528,8 +528,8 @@ class MachineBasicBlock
}
LLVM_ABI livein_iterator livein_begin() const;
- livein_iterator livein_end() const { return LiveIns.end(); }
- bool livein_empty() const { return LiveIns.empty(); }
+ livein_iterator livein_end() const { return LiveIns.end(); }
+ bool livein_empty() const { return LiveIns.empty(); }
iterator_range<livein_iterator> liveins() const {
return make_range(livein_begin(), livein_end());
}
@@ -581,13 +581,9 @@ class MachineBasicBlock
return Tmp;
}
- reference operator*() const {
- return *LiveRegI;
- }
+ reference operator*() const { return *LiveRegI; }
- pointer operator->() const {
- return &*LiveRegI;
- }
+ pointer operator->() const { return &*LiveRegI; }
bool operator==(const liveout_iterator &RHS) const {
if (BlockI != BlockEnd)
@@ -598,6 +594,7 @@ class MachineBasicBlock
bool operator!=(const liveout_iterator &RHS) const {
return !(*this == RHS);
}
+
private:
bool advanceToValidPosition() {
if (LiveRegI != (*BlockI)->livein_end())
@@ -710,6 +707,14 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
+ const SmallVector<unsigned> &getPrefetchTargets() const {
+ return PrefetchTargets;
+ }
+
+ void setPrefetchTargets(const SmallVector<unsigned> &V) {
+ PrefetchTargets = V;
+ }
+
/// Returns the section ID of this basic block.
MBBSectionID getSectionID() const { return SectionID; }
@@ -978,9 +983,7 @@ class MachineBasicBlock
/// Convenience function that returns true if the block ends in a return
/// instruction.
- bool isReturnBlock() const {
- return !empty() && back().isReturn();
- }
+ bool isReturnBlock() const { return !empty() && back().isReturn(); }
/// Convenience function that returns true if the bock ends in a EH scope
/// return instruction.
@@ -1057,8 +1060,7 @@ class MachineBasicBlock
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M);
/// Insert a range of instructions into the instruction list before I.
- template<typename IT>
- void insert(iterator I, IT S, IT E) {
+ template <typename IT> void insert(iterator I, IT S, IT E) {
assert((I == end() || I->getParent() == this) &&
"iterator points outside of basic block");
Insts.insert(I.getInstrIterator(), S, E);
@@ -1116,17 +1118,13 @@ class MachineBasicBlock
/// Remove an instruction or bundle from the instruction list and delete it.
///
/// If I points to a bundle of instructions, they are all erased.
- iterator erase(iterator I) {
- return erase(I, std::next(I));
- }
+ iterator erase(iterator I) { return erase(I, std::next(I)); }
/// Remove an instruction from the instruction list and delete it.
///
/// If I is the head of a bundle of instructions, the whole bundle will be
/// erased.
- iterator erase(MachineInstr *I) {
- return erase(iterator(I));
- }
+ iterator erase(MachineInstr *I) { return erase(iterator(I)); }
/// Remove the unbundled instruction from the instruction list without
/// deleting it.
@@ -1145,9 +1143,7 @@ class MachineBasicBlock
/// bundle will still be bundled after removing the single instruction.
LLVM_ABI MachineInstr *remove_instr(MachineInstr *I);
- void clear() {
- Insts.clear();
- }
+ void clear() { Insts.clear(); }
/// Take an instruction from MBB 'Other' at the position From, and insert it
/// into this MBB right before 'Where'.
@@ -1164,8 +1160,8 @@ class MachineBasicBlock
///
/// The instruction at 'Where' must not be included in the range of
/// instructions to move.
- void splice(iterator Where, MachineBasicBlock *Other,
- iterator From, iterator To) {
+ void splice(iterator Where, MachineBasicBlock *Other, iterator From,
+ iterator To) {
Insts.splice(Where.getInstrIterator(), Other->Insts,
From.getInstrIterator(), To.getInstrIterator());
}
@@ -1251,7 +1247,7 @@ class MachineBasicBlock
bool IsStandalone = true) const;
enum PrintNameFlag {
- PrintNameIr = (1 << 0), ///< Add IR name where available
+ PrintNameIr = (1 << 0), ///< Add IR name where available
PrintNameAttributes = (1 << 1), ///< Print attributes
};
@@ -1275,6 +1271,12 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
+ MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+ const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
+ return CallInstSymbols;
+ }
+
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
@@ -1282,9 +1284,7 @@ class MachineBasicBlock
return IrrLoopHeaderWeight;
}
- void setIrrLoopHeaderWeight(uint64_t Weight) {
- IrrLoopHeaderWeight = Weight;
- }
+ void setIrrLoopHeaderWeight(uint64_t Weight) { IrrLoopHeaderWeight = Weight; }
/// Return probability of the edge from this block to MBB. This method should
/// NOT be called directly, but by using getEdgeProbability method from
@@ -1393,7 +1393,7 @@ static_assert(GraphHasNodeNumbers<const MachineBasicBlock *>,
// to be when traversing the predecessor edges of a MBB
// instead of the successor edges.
//
-template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<MachineBasicBlock *>> {
using NodeRef = MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::pred_iterator;
@@ -1413,7 +1413,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
static_assert(GraphHasNodeNumbers<Inverse<MachineBasicBlock *>>,
"GraphTraits getNumber() not detected");
-template <> struct GraphTraits<Inverse<const MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<const MachineBasicBlock *>> {
using NodeRef = const MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::const_pred_iterator;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4fcb7f36e0238..ab9fe82bc7917 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -78,9 +78,9 @@ class MachineInstr
/// otherwise easily derivable from the IR text.
///
enum CommentFlag {
- ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
+ ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
NoSchedComment = 0x2,
- TAsmComments = 0x4 // Target Asm comments should start from this value.
+ TAsmComments = 0x4 // Target Asm comments should start from this value.
};
enum MIFlag {
@@ -123,16 +123,17 @@ class MachineInstr
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
SameSign = 1 << 21, // Both operands have the same sign.
- InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
+ InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
// Implies NoUSWrap.
+ Prefetch = 1 << 23, // Instruction is a prefetch.
};
private:
- const MCInstrDesc *MCID; // Instruction descriptor.
- MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
+ const MCInstrDesc *MCID; // Instruction descriptor.
+ MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
// Operands are allocated by an ArrayRecycler.
- MachineOperand *Operands = nullptr; // Pointer to the first operand.
+ MachineOperand *Operands = nullptr; // Pointer to the first operand.
#define LLVM_MI_NUMOPERANDS_BITS 24
#define LLVM_MI_FLAGS_BITS 24
@@ -144,7 +145,7 @@ class MachineInstr
// OperandCapacity has uint8_t size, so it should be next to NumOperands
// to properly pack.
using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
- OperandCapacity CapOperands; // Capacity of the Operands array.
+ OperandCapacity CapOperands; // Capacity of the Operands array.
/// Various bits of additional information about the machine instruction.
uint32_t Flags : LLVM_MI_FLAGS_BITS;
@@ -226,9 +227,8 @@ class MachineInstr
}
MDNode *getPCSections() const {
- return HasPCSections
- ? getTrailingObjects<MDNode *>()[HasHeapAllocMarker]
- : nullptr;
+ return HasPCSections ? getTrailingObjects<MDNode *>()[HasHeapAllocMarker]
+ : nullptr;
}
uint32_t getCFIType() const {
@@ -356,8 +356,8 @@ class MachineInstr
// Use MachineFunction::DeleteMachineInstr() instead.
~MachineInstr() = delete;
- const MachineBasicBlock* getParent() const { return Parent; }
- MachineBasicBlock* getParent() { return Parent; }
+ const MachineBasicBlock *getParent() const { return Parent; }
+ MachineBasicBlock *getParent() { return Parent; }
/// Move the instruction before \p MovePos.
LLVM_ABI void moveBefore(MachineInstr *MovePos);
@@ -401,9 +401,7 @@ class MachineInstr
}
/// Return the MI flags bitvector.
- uint32_t getFlags() const {
- return Flags;
- }
+ uint32_t getFlags() const { return Flags; }
/// Return whether an MI flag is set.
bool getFlag(MIFlag Flag) const {
@@ -475,15 +473,11 @@ class MachineInstr
/// ----------------
/// The first instruction has the special opcode "BUNDLE". It's not "inside"
/// a bundle, but the next three MIs are.
- bool isInsideBundle() const {
- return getFlag(BundledPred);
- }
+ bool isInsideBundle() const { return getFlag(BundledPred); }
/// Return true if this instruction part of a bundle. This is true
/// if either itself or its following instruction is marked "InsideBundle".
- bool isBundled() const {
- return isBundledWithPred() || isBundledWithSucc();
- }
+ bool isBundled() const { return isBundledWithPred() || isBundledWithSucc(); }
/// Return true if this instruction is part of a bundle, and it is not the
/// first instruction in the bundle.
@@ -882,9 +876,9 @@ class MachineInstr
/// queries but they are bundle aware.
enum QueryType {
- IgnoreBundle, // Ignore bundles
- AnyInBundle, // Return true if any instruction in bundle has property
- AllInBundle // Return true if all instructions in bundle have property
+ IgnoreBundle, // Ignore bundles
+ AnyInBundle, // Return true if any instruction in bundle has property
+ AllInBundle // Return true if all instructions in bundle have property
};
/// Return true if the instruction (or in the case of a bundle,
@@ -1010,8 +1004,8 @@ class MachineInstr
/// values. There are various methods in TargetInstrInfo that can be used to
/// control and modify the predicate in this instruction.
bool isPredicable(QueryType Type = AllInBundle) const {
- // If it's a bundle than all bundled instructions must be predicable for this
- // to return true.
+ // If it's a bundle than all bundled instructions must be predicable for
+ // this to return true.
return hasProperty(MCID::Predicable, Type);
}
@@ -1269,10 +1263,10 @@ class MachineInstr
}
enum MICheckType {
- CheckDefs, // Check all operands for equality
- CheckKillDead, // Check all operands including kill / dead markers
- IgnoreDefs, // Ignore all definitions
- IgnoreVRegDefs // Ignore virtual register definitions
+ CheckDefs, // Check all operands for equality
+ CheckKillDead, // Check all operands including kill / dead markers
+ IgnoreDefs, // Ignore all definitions
+ IgnoreVRegDefs // Ignore virtual register definitions
};
/// Return true if this instruction is identical to \p Other.
@@ -1399,7 +1393,9 @@ class MachineInstr
getOpcode() == TargetOpcode::G_PHI;
}
bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
- bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
+ bool isImplicitDef() const {
+ return getOpcode() == TargetOpcode::IMPLICIT_DEF;
+ }
bool isInlineAsm() const {
return getOpcode() == TargetOpcode::INLINEASM ||
getOpcode() == TargetOpcode::INLINEASM_BR;
@@ -1424,13 +1420,9 @@ class MachineInstr
return getOpcode() == TargetOpcode::REG_SEQUENCE;
}
- bool isBundle() const {
- return getOpcode() == TargetOpcode::BUNDLE;
- }
+ bool isBundle() const { return getOpcode() == TargetOpcode::BUNDLE; }
- bool isCopy() const {
- return getOpcode() == TargetOpcode::COPY;
- }
+ bool isCopy() const { return getOpcode() == TargetOpcode::COPY; }
bool isFullCopy() const {
return isCopy() && !getOperand(0).getSubReg() && !getOperand(1).getSubReg();
@@ -1444,14 +1436,12 @@ class MachineInstr
/// Return true if the instruction behaves like a copy.
/// This does not include native copy instructions.
- bool isCopyLike() const {
- return isCopy() || isSubregToReg();
- }
+ bool isCopyLike() const { return isCopy() || isSubregToReg(); }
/// Return true is the instruction is an identity copy.
bool isIdentityCopy() const {
return isCopy() && getOperand(0).getReg() == getOperand(1).getReg() &&
- getOperand(0).getSubReg() == getOperand(1).getSubReg();
+ getOperand(0).getSubReg() == getOperand(1).getSubReg();
}
/// Return true if this is a transient instruction that is either very likely
@@ -2067,12 +2057,12 @@ class MachineInstr
/// Unlink all of the register operands in this instruction from their
/// respective use lists. This requires that the operands already be on their
/// use lists.
- void removeRegOperandsFromUseLists(MachineRegisterInfo&);
+ void removeRegOperandsFromUseLists(MachineRegisterInfo &);
/// Add all of the register operands in this instruction from their
/// respective use lists. This requires that the operands not be on their
/// use lists yet.
- void addRegOperandsToUseLists(MachineRegisterInfo&);
+ void addRegOperandsToUseLists(MachineRegisterInfo &);
/// Slow path for hasProperty when we're dealing with a bundle.
LLVM_ABI bool hasPropertyInBundle(uint64_t Mask, QueryType Type) const;
@@ -2096,19 +2086,17 @@ class MachineInstr
/// instruction rather than by pointer value.
/// The hashing and equality testing functions ignore definitions so this is
/// useful for CSE, etc.
-struct MachineInstrExpressionTrait : DenseMapInfo<MachineInstr*> {
- static inline MachineInstr *getEmptyKey() {
- return nullptr;
- }
+struct MachineInstrExpressionTrait : DenseMapInfo<MachineInstr *> {
+ static inline MachineInstr *getEmptyKey() { return nullptr; }
static inline MachineInstr *getTombstoneKey() {
- return reinterpret_cast<MachineInstr*>(-1);
+ return reinterpret_cast<MachineInstr *>(-1);
}
LLVM_ABI static unsigned getHashValue(const MachineInstr *const &MI);
- static bool isEqual(const MachineInstr* const &LHS,
- const MachineInstr* const &RHS) {
+ static bool isEqual(const MachineInstr *const &LHS,
+ const MachineInstr *const &RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
LHS == getEmptyKey() || LHS == getTombstoneKey())
return LHS == RHS;
@@ -2119,7 +2107,7 @@ struct MachineInstrExpressionTrait : DenseMapInfo<MachineInstr*> {
//===----------------------------------------------------------------------===//
// Debugging Support
-inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) {
+inline raw_ostream &operator<<(raw_ostream &OS, const MachineInstr &MI) {
MI.print(OS);
return OS;
}
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index c8196d8a7ef48..fb7e92d056c27 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
+LLVM_ABI void initializePrefetchInsertionPass(PassRegistry &);
LLVM_ABI void
initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f65d88a669f13..1b06e0390603b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,6 +119,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> InsertNoopsForPrefetch(
+ "insert-noops-for-prefetch",
+ cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+ cl::Hidden);
+
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -199,9 +205,7 @@ class AddrLabelMapCallbackPtr final : CallbackVH {
AddrLabelMapCallbackPtr() = default;
AddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {}
- void setPtr(BasicBlock *BB) {
- ValueHandleBase::operator=(BB);
- }
+ void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); }
void setMap(AddrLabelMap *map) { Map = map; }
@@ -639,7 +643,8 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
case ExceptionHandling::WinEH:
switch (MAI->getWinEHEncodingType()) {
- default: llvm_unreachable("unsupported unwinding information encoding");
+ default:
+ llvm_unreachable("unsupported unwinding information encoding");
case WinEH::EncodingType::Invalid:
break;
case WinEH::EncodingType::X86:
@@ -697,7 +702,7 @@ void AsmPrinter::emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
} else if (MAI->avoidWeakIfComdat() && GV->hasComdat()) {
// .globl _foo
OutStreamer->emitSymbolAttribute(GVSym, MCSA_Global);
- //NOTE: linkonce is handled by the section the symbol was assigned to.
+ // NOTE: linkonce is handled by the section the symbol was assigned to.
} else {
// .weak _foo
OutStreamer->emitSymbolAttribute(GVSym, MCSA_Weak);
@@ -734,7 +739,8 @@ MCSymbol *AsmPrinter::getSymbolPreferLocal(const GlobalValue &GV) const {
// assembler would otherwise be conservative and assume a global default
// visibility symbol can be interposable, even if the code generator already
// assumed it.
- if (TM.getTargetTriple().isOSBinFormatELF() && GV.canBenefitFromLocalAlias()) {
+ if (TM.getTargetTriple().isOSBinFormatELF() &&
+ GV.canBenefitFromLocalAlias()) {
const Module &M = *GV.getParent();
if (TM.getRelocationModel() != Reloc::Static &&
M.getPIELevel() == PIELevel::Default && GV.isDSOLocal())
@@ -791,7 +797,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->emitSymbolAttribute(EmittedSym, MCSA_Memtag);
}
- if (!GV->hasInitializer()) // External globals require no extra code.
+ if (!GV->hasInitializer()) // External globals require no extra code.
return;
GVSym->redefineIfPossible();
@@ -817,7 +823,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// Handle common symbols
if (GVKind.isCommon()) {
- if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+ if (Size == 0)
+ Size = 1; // .comm Foo, 0 is undefined, avoid it.
// .comm _foo, 42, 4
OutStreamer->emitCommonSymbol(GVSym, Size, Alignment);
return;
@@ -887,8 +894,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
emitAlignment(Alignment, GV);
OutStreamer->emitLabel(MangSym);
- emitGlobalConstant(GV->getDataLayout(),
- GV->getInitializer());
+ emitGlobalConstant(GV->getDataLayout(), GV->getInitializer());
}
OutStreamer->addBlankLine();
@@ -907,7 +913,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
// - pointer to mangled symbol above with initializer
unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
- PtrSize);
+ PtrSize);
OutStreamer->emitIntValue(0, PtrSize);
OutStreamer->emitSymbolValue(MangSym, PtrSize);
@@ -1063,7 +1069,7 @@ void AsmPrinter::emitFunctionHeader() {
// If the function had address-taken blocks that got deleted, then we have
// references to the dangling symbols. Emit them at the start of the function
// so that we don't get references to undefined symbols.
- std::vector<MCSymbol*> DeadBlockSyms;
+ std::vector<MCSymbol *> DeadBlockSyms;
takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
OutStreamer->AddComment("Address taken block that was later removed");
@@ -1075,7 +1081,7 @@ void AsmPrinter::emitFunctionHeader() {
MCSymbol *CurPos = OutContext.createTempSymbol();
OutStreamer->emitLabel(CurPos);
OutStreamer->emitAssignment(CurrentFnBegin,
- MCSymbolRefExpr::create(CurPos, OutContext));
+ MCSymbolRefExpr::create(CurPos, OutContext));
} else {
OutStreamer->emitLabel(CurrentFnBegin);
}
@@ -1394,7 +1400,7 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
// Emit a symbol assignment.
OutStreamer->emitAssignment(FrameAllocSym,
- MCConstantExpr::create(FrameOffset, OutContext));
+ MCConstantExpr::create(FrameOffset, OutContext));
}
/// Returns the BB metadata to be emitted in the SHT_LLVM_BB_ADDR_MAP section
@@ -1983,10 +1989,38 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
+ int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
+ unsigned NumCallsInBlock = 0;
for (auto &MI : MBB) {
+ if (NextPrefetchTargetIndex != -1 &&
+ NumCallsInBlock >=
+ MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
+
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+ utostr(MBB.getBBID()->BaseID) + Twine("_") +
+ utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+ if (MF->getFunction().isWeakForLinker()) {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
+ errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName()
+ << "\n";
+ } else {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+ errs() << "Emitting global symbol: "
+ << PrefetchTargetSymbol->getName() << "\n";
+ }
+ // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+ // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() <<
+ // "\n";
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++NextPrefetchTargetIndex;
+ if (NextPrefetchTargetIndex >=
+ static_cast<int>(MBB.getPrefetchTargets().size()))
+ NextPrefetchTargetIndex = -1;
+ }
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
@@ -2061,10 +2095,12 @@ void AsmPrinter::emitFunctionBody() {
}
break;
case TargetOpcode::IMPLICIT_DEF:
- if (isVerbose()) emitImplicitDef(&MI);
+ if (isVerbose())
+ emitImplicitDef(&MI);
break;
case TargetOpcode::KILL:
- if (isVerbose()) emitKill(&MI, *this);
+ if (isVerbose())
+ emitKill(&MI, *this);
break;
case TargetOpcode::FAKE_USE:
if (isVerbose())
@@ -2089,7 +2125,11 @@ void AsmPrinter::emitFunctionBody() {
// actual initialization is needed.
break;
default:
- emitInstruction(&MI);
+ if (MI.getFlag(MachineInstr::Prefetch) && InsertNoopsForPrefetch) {
+ OutStreamer->emitNops(7, 7, SMLoc(), getSubtargetInfo());
+ } else {
+ emitInstruction(&MI);
+ }
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
@@ -2126,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
+ while (NextPrefetchTargetIndex != -1) {
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+ utostr(MBB.getBBID()->BaseID) + Twine("_") +
+ utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+ if (MF->getFunction().hasWeakLinkage()) {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol,
+ MCSA_WeakDefinition);
+ } else {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+ }
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++NextPrefetchTargetIndex;
+ if (NextPrefetchTargetIndex >=
+ static_cast<int>(MBB.getPrefetchTargets().size()))
+ NextPrefetchTargetIndex = -1;
+ }
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
@@ -2865,7 +2923,7 @@ bool AsmPrinter::doFinalization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
- for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+ for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E;)
if (GCMetadataPrinter *MP = getOrCreateGCPrinter(**--I))
MP->finishAssembly(M, *MI, *this);
@@ -3013,13 +3071,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
namespace {
// Keep track the alignment, constpool entries per Section.
- struct SectionCPs {
- MCSection *S;
- Align Alignment;
- SmallVector<unsigned, 4> CPEs;
+struct SectionCPs {
+ MCSection *S;
+ Align Alignment;
+ SmallVector<unsigned, 4> CPEs;
- SectionCPs(MCSection *s, Align a) : S(s), Alignment(a) {}
- };
+ SectionCPs(MCSection *s, Align a) : S(s), Alignment(a) {}
+};
} // end anonymous namespace
@@ -3037,7 +3095,8 @@ StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const {
void AsmPrinter::emitConstantPool() {
const MachineConstantPool *MCP = MF->getConstantPool();
const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
- if (CP.empty()) return;
+ if (CP.empty())
+ return;
// Calculate sections for constant pool entries. We collect entries to go into
// the same section together to reduce amount of section switch statements.
@@ -3112,10 +3171,12 @@ void AsmPrinter::emitConstantPool() {
// function.
void AsmPrinter::emitJumpTableInfo() {
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (!MJTI) return;
+ if (!MJTI)
+ return;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty()) return;
+ if (JT.empty())
+ return;
if (!TM.Options.EnableStaticDataPartitioning) {
emitJumpTableImpl(*MJTI, llvm::to_vector(llvm::seq<unsigned>(JT.size())));
@@ -3333,7 +3394,7 @@ void AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo &MJTI,
/// do nothing and return false.
bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
if (GV->getName() == "llvm.used") {
- if (MAI->hasNoDeadStrip()) // No need to emit this at all.
+ if (MAI->hasNoDeadStrip()) // No need to emit this at all.
emitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
return true;
}
@@ -3376,7 +3437,8 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) {
return true;
}
- if (!GV->hasAppendingLinkage()) return false;
+ if (!GV->hasAppendingLinkage())
+ return false;
assert(GV->hasInitializer() && "Not a special LLVM global!");
@@ -3406,7 +3468,7 @@ void AsmPrinter::emitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
const GlobalValue *GV =
- dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+ dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
if (GV)
OutStreamer->emitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip);
}
@@ -3820,7 +3882,8 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
assert(!Data.empty() && "Empty aggregates should be CAZ node");
char C = Data[0];
for (unsigned i = 1, e = Data.size(); i != e; ++i)
- if (Data[i] != C) return -1;
+ if (Data[i] != C)
+ return -1;
return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
}
@@ -3965,7 +4028,8 @@ static void emitGlobalConstantVector(const DataLayout &DL, const Constant *CV,
EmittedSize = DL.getTypeStoreSize(CV->getType());
} else {
for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
- emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I, AliasList);
+ emitGlobalAliasInline(AP, DL.getTypeAllocSize(CV->getType()) * I,
+ AliasList);
emitGlobalConstantImpl(DL, CV->getAggregateElement(I), AP);
}
EmittedSize = DL.getTypeAllocSize(ElementType) * VTy->getNumElements();
@@ -4083,8 +4147,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
// ExtraBits 0 1 (BitWidth / 64) - 1
// chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN]
ExtraBitsSize = alignTo(ExtraBitsSize, 8);
- ExtraBits = Realigned.getRawData()[0] &
- (((uint64_t)-1) >> (64 - ExtraBitsSize));
+ ExtraBits =
+ Realigned.getRawData()[0] & (((uint64_t)-1) >> (64 - ExtraBitsSize));
if (BitWidth >= 64)
Realigned.lshrInPlace(ExtraBitsSize);
} else
@@ -4107,8 +4171,9 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
uint64_t Size = AP.getDataLayout().getTypeStoreSize(CI->getType());
Size -= (BitWidth / 64) * 8;
assert(Size && Size * 8 >= ExtraBitsSize &&
- (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
- == ExtraBits && "Directive too small for extra bits.");
+ (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) ==
+ ExtraBits &&
+ "Directive too small for extra bits.");
AP.OutStreamer->emitIntValue(ExtraBits, Size);
}
}
@@ -4430,12 +4495,13 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(const Twine &Sym) const {
/// PrintParentLoopComment - Print comments about parent loops of this one.
static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
- if (!Loop) return;
+ if (!Loop)
+ return;
PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
- OS.indent(Loop->getLoopDepth()*2)
- << "Parent Loop BB" << FunctionNumber << "_"
- << Loop->getHeader()->getNumber()
- << " Depth=" << Loop->getLoopDepth() << '\n';
+ OS.indent(Loop->getLoopDepth() * 2)
+ << "Parent Loop BB" << FunctionNumber << "_"
+ << Loop->getHeader()->getNumber() << " Depth=" << Loop->getLoopDepth()
+ << '\n';
}
/// PrintChildLoopComment - Print comments about child loops within
@@ -4444,10 +4510,10 @@ static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
// Add child loop information
for (const MachineLoop *CL : *Loop) {
- OS.indent(CL->getLoopDepth()*2)
- << "Child Loop BB" << FunctionNumber << "_"
- << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth()
- << '\n';
+ OS.indent(CL->getLoopDepth() * 2)
+ << "Child Loop BB" << FunctionNumber << "_"
+ << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth()
+ << '\n';
PrintChildLoopComment(OS, CL, FunctionNumber);
}
}
@@ -4458,7 +4524,8 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
const AsmPrinter &AP) {
// Add loop depth information
const MachineLoop *Loop = LI->getLoopFor(&MBB);
- if (!Loop) return;
+ if (!Loop)
+ return;
MachineBasicBlock *Header = Loop->getHeader();
assert(Header && "No header for loop");
@@ -4467,9 +4534,9 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
// and return.
if (Header != &MBB) {
AP.OutStreamer->AddComment(" in Loop: Header=BB" +
- Twine(AP.getFunctionNumber())+"_" +
- Twine(Loop->getHeader()->getNumber())+
- " Depth="+Twine(Loop->getLoopDepth()));
+ Twine(AP.getFunctionNumber()) + "_" +
+ Twine(Loop->getHeader()->getNumber()) +
+ " Depth=" + Twine(Loop->getLoopDepth()));
return;
}
@@ -4480,7 +4547,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber());
OS << "=>";
- OS.indent(Loop->getLoopDepth()*2-2);
+ OS.indent(Loop->getLoopDepth() * 2 - 2);
OS << "This ";
if (Loop->isInnermost())
@@ -4601,7 +4668,8 @@ void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
MCSymbolAttr Attr = MCSA_Invalid;
switch (Visibility) {
- default: break;
+ default:
+ break;
case GlobalValue::HiddenVisibility:
if (IsDefinition)
Attr = MAI->getHiddenVisibilityAttr();
@@ -4636,8 +4704,8 @@ bool AsmPrinter::shouldEmitLabelForBasicBlock(
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
/// exactly one predecessor and the control transfer mechanism between
/// the predecessor and this block is a fall-through.
-bool AsmPrinter::
-isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+bool AsmPrinter::isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock *MBB) const {
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
if (MBB->isEHPad() || MBB->pred_empty())
@@ -4840,7 +4908,7 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
auto Attr = F.getFnAttribute("function-instrument");
bool LogArgs = F.hasFnAttribute("xray-log-args");
bool AlwaysInstrument =
- Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+ Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
Kind = SledKind::LOG_ARGS_ENTER;
Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind,
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index e317e1c06741f..89bfa8a5ebb6f 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -106,7 +106,8 @@ class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
- BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+ // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
+ // nullptr;
BasicBlockSections() : MachineFunctionPass(ID) {
initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
@@ -305,8 +306,7 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) {
// clusters of basic blocks using basic block ids. Source drift can
// invalidate these groupings leading to sub-optimal code generation with
// regards to performance.
- if (BBSectionsType == BasicBlockSection::List &&
- hasInstrProfHashMismatch(MF))
+ if (BBSectionsType == BasicBlockSection::List && hasInstrProfHashMismatch(MF))
return false;
// Renumber blocks before sorting them. This is useful for accessing the
// original layout positions and finding the original fallthroughs.
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 485b44ae4c4aa..9127ddfec4085 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -65,9 +65,9 @@ std::pair<bool, SmallVector<BBClusterInfo>>
BasicBlockSectionsProfileReader::getClusterInfoForFunction(
StringRef FuncName) const {
auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
- return R != ProgramPathAndClusterInfo.end()
- ? std::pair(true, R->second.ClusterInfo)
- : std::pair(false, SmallVector<BBClusterInfo>());
+ if (R == ProgramPathAndClusterInfo.end() || R->second.ClusterInfo.empty())
+ return std::pair(false, SmallVector<BBClusterInfo>());
+ return std::pair(true, R->second.ClusterInfo);
}
SmallVector<SmallVector<unsigned>>
@@ -91,6 +91,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
+ .PrefetchTargets;
+}
+
// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
@@ -306,6 +319,61 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
+ case 'i': { // Prefetch hint specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ assert(Values.size() == 2);
+ SmallVector<StringRef, 2> PrefetchSiteStr;
+ Values[0].split(PrefetchSiteStr, '@');
+ assert(PrefetchSiteStr.size() == 2);
+ auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
+ if (!SiteBBID)
+ return SiteBBID.takeError();
+ unsigned long long SiteBBOffset;
+ if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchSiteStr[1]);
+
+ SmallVector<StringRef, 3> PrefetchTargetStr;
+ Values[1].split(PrefetchTargetStr, '@');
+ assert(PrefetchTargetStr.size() == 3);
+ auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
+ if (!TargetBBID)
+ return TargetBBID.takeError();
+ unsigned long long TargetBBOffset;
+ if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchTargetStr[2]);
+ // errs() << "Read it " << " " << SiteBBOffset << " " <<
+ // PrefetchTargetStr[0] << " " <<TargetBBOffset << "\n";
+ FI->second.PrefetchHints.push_back(
+ PrefetchHint{{*SiteBBID, static_cast<unsigned>(SiteBBOffset)},
+ PrefetchTargetStr[0],
+ {*TargetBBID, static_cast<unsigned>(TargetBBOffset)}});
+ continue;
+ }
+ case 'j': { // Prefetch target specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ assert(Values.size() == 1);
+ SmallVector<StringRef, 2> PrefetchTargetStr;
+ Values[0].split(PrefetchTargetStr, '@');
+ assert(PrefetchTargetStr.size() == 2);
+ auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
+ if (!TargetBBID)
+ return TargetBBID.takeError();
+ unsigned long long TargetBBOffset;
+ if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchTargetStr[1]);
+ FI->second.PrefetchTargets.insert(
+ BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+ continue;
+ }
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
@@ -512,6 +580,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchTargetsForFunction(FuncName);
+}
+
BasicBlockSectionsProfileReader &
BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
return BBSPR;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0309e225d9df4..a3c28b3128f50 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
@@ -3837,11 +3838,11 @@ class AddressingModeMatcher {
TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
- : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
- DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
- AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
- InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
- LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
+ : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), DL(MI->getDataLayout()), LI(LI),
+ getDTFn(getDTFn), AccessTy(AT), AddrSpace(AS), MemoryInst(MI),
+ AddrMode(AM), InsertedInsts(InsertedInsts),
+ PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
+ OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@@ -4478,8 +4479,8 @@ class AddressingModeCombiner {
// It must be a Phi node then.
PHINode *CurrentPhi = cast<PHINode>(Current);
unsigned PredCount = CurrentPhi->getNumIncomingValues();
- PHINode *PHI =
- PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
+ PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
+ CurrentPhi->getIterator());
Map[Current] = PHI;
ST.insertNewPhi(PHI);
append_range(Worklist, CurrentPhi->incoming_values());
@@ -4791,7 +4792,7 @@ class TypePromotionHelper {
public:
/// Type for the utility function that promotes the operand of Ext.
- using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
+ using Action = Value *(*)(Instruction * Ext, TypePromotionTransaction &TPT,
InstrToOrigTy &PromotedInsts,
unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
@@ -5176,9 +5177,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// Try to match an integer constant second to increase its chance of ending
// up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
int First = 0, Second = 1;
- if (isa<ConstantInt>(AddrInst->getOperand(First))
- && !isa<ConstantInt>(AddrInst->getOperand(Second)))
- std::swap(First, Second);
+ if (isa<ConstantInt>(AddrInst->getOperand(First)) &&
+ !isa<ConstantInt>(AddrInst->getOperand(Second)))
+ std::swap(First, Second);
AddrMode.InBounds = false;
if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
matchAddr(AddrInst->getOperand(Second), Depth + 1))
@@ -5262,32 +5263,32 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
- if (!cast<GEPOperator>(AddrInst)->isInBounds())
- AddrMode.InBounds = false;
- return true;
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
+ return true;
}
AddrMode.BaseOffs -= ConstantOffset;
if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
ConstantOffset > 0) {
- // Record GEPs with non-zero offsets as candidates for splitting in
- // the event that the offset cannot fit into the r+i addressing mode.
- // Simple and common case that only one GEP is used in calculating the
- // address for the memory access.
- Value *Base = AddrInst->getOperand(0);
- auto *BaseI = dyn_cast<Instruction>(Base);
- auto *GEP = cast<GetElementPtrInst>(AddrInst);
- if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
- (BaseI && !isa<CastInst>(BaseI) &&
- !isa<GetElementPtrInst>(BaseI))) {
- // Make sure the parent block allows inserting non-PHI instructions
- // before the terminator.
- BasicBlock *Parent = BaseI ? BaseI->getParent()
- : &GEP->getFunction()->getEntryBlock();
- if (!Parent->getTerminator()->isEHPad())
+ // Record GEPs with non-zero offsets as candidates for splitting in
+ // the event that the offset cannot fit into the r+i addressing mode.
+ // Simple and common case that only one GEP is used in calculating the
+ // address for the memory access.
+ Value *Base = AddrInst->getOperand(0);
+ auto *BaseI = dyn_cast<Instruction>(Base);
+ auto *GEP = cast<GetElementPtrInst>(AddrInst);
+ if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+ (BaseI && !isa<CastInst>(BaseI) &&
+ !isa<GetElementPtrInst>(BaseI))) {
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
+ BasicBlock *Parent =
+ BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
+ if (!Parent->getTerminator()->isEHPad())
LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
- }
+ }
}
return false;
@@ -5622,7 +5623,6 @@ static bool FindAllMemoryUses(
PSI, BFI, SeenInsts);
}
-
/// Return true if Val is already known to be live at the use site that we're
/// folding it into. If so, there is no cost to include it in the addressing
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..cf0f47beb4c03 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+ if (CallInstSymbols.size() <= CallInstNumber) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ CallInstSymbols.resize(CallInstNumber + 1);
+ CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+ "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+ Twine(CallInstNumber),
+ /*AlwaysEmit=*/true);
+ }
+ return CallInstSymbols[CallInstNumber];
+}
+
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
@@ -211,8 +224,8 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition() ||
- TII->isBasicBlockPrologue(*I)))
+ while (I != E &&
+ (I->isPHI() || I->isPosition() || TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels
// inside the bundle.
@@ -296,9 +309,7 @@ bool MachineBasicBlock::isEntryBlock() const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void MachineBasicBlock::dump() const {
- print(dbgs());
-}
+LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { print(dbgs()); }
#endif
bool MachineBasicBlock::mayHaveInlineAsmBr() const {
@@ -378,7 +389,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
// Print the preds of this block according to the CFG.
if (!pred_empty() && IsStandalone) {
- if (Indexes) OS << '\t';
+ if (Indexes)
+ OS << '\t';
// Don't indent(2), align with previous line attributes.
OS << "; predecessors: ";
ListSeparator LS;
@@ -389,7 +401,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (!succ_empty()) {
- if (Indexes) OS << '\t';
+ if (Indexes)
+ OS << '\t';
// Print the successors
OS.indent(2) << "successors: ";
ListSeparator LS;
@@ -420,7 +433,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (!livein_empty() && MRI.tracksLiveness()) {
- if (Indexes) OS << '\t';
+ if (Indexes)
+ OS << '\t';
OS.indent(2) << "liveins: ";
ListSeparator LS;
@@ -463,7 +477,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS.indent(2) << "}\n";
if (IrrLoopHeaderWeight && IsStandalone) {
- if (Indexes) OS << '\t';
+ if (Indexes)
+ OS << '\t';
OS.indent(2) << "; Irreducible loop header weight: " << *IrrLoopHeaderWeight
<< '\n';
}
@@ -659,8 +674,8 @@ void MachineBasicBlock::sortUniqueLiveIns() {
LiveIns.erase(Out, LiveIns.end());
}
-Register
-MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
+Register MachineBasicBlock::addLiveIn(MCRegister PhysReg,
+ const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
assert(PhysReg.isPhysical() && "Expected physreg");
assert(RC && "Register class is required");
@@ -674,7 +689,7 @@ MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC)
// Look for an existing copy.
if (LiveIn)
- for (;I != E && I->isCopy(); ++I)
+ for (; I != E && I->isCopy(); ++I)
if (I->getOperand(1).getReg() == PhysReg) {
Register VirtReg = I->getOperand(0).getReg();
if (!MRI.constrainRegClass(VirtReg, RC))
@@ -685,7 +700,7 @@ MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC)
// No luck, create a virtual register.
Register VirtReg = MRI.createVirtualRegister(RC);
BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg)
- .addReg(PhysReg, RegState::Kill);
+ .addReg(PhysReg, RegState::Kill);
if (!LiveIn)
addLiveIn(PhysReg);
return VirtReg;
@@ -722,7 +737,7 @@ void MachineBasicBlock::updateTerminator(
SmallVector<MachineOperand, 4> Cond;
DebugLoc DL = findBranchDebugLoc();
bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
- (void) B;
+ (void)B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
if (Cond.empty()) {
if (TBB) {
@@ -951,8 +966,8 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
}
}
-void
-MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
+void MachineBasicBlock::transferSuccessorsAndUpdatePHIs(
+ MachineBasicBlock *FromMBB) {
if (this == FromMBB)
return;
@@ -1019,7 +1034,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
}
// If there is no branch, control always falls through.
- if (!TBB) return &*Fallthrough;
+ if (!TBB)
+ return &*Fallthrough;
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
@@ -1029,16 +1045,15 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
// If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
- if (Cond.empty()) return nullptr;
+ if (Cond.empty())
+ return nullptr;
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
return (FBB == nullptr) ? &*Fallthrough : nullptr;
}
-bool MachineBasicBlock::canFallThrough() {
- return getFallThrough() != nullptr;
-}
+bool MachineBasicBlock::canFallThrough() { return getFallThrough() != nullptr; }
MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI,
bool UpdateLiveIns,
@@ -1312,7 +1327,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// will extend to the end of the new split block.
bool isLastMBB =
- std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
+ std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
SlotIndex StartIndex = Indexes->getMBBEndIdx(this);
SlotIndex PrevIndex = StartIndex.getPrevSlot();
@@ -1320,11 +1335,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// Find the registers used from NMBB in PHIs in Succ.
SmallSet<Register, 8> PHISrcRegs;
- for (MachineBasicBlock::instr_iterator
- I = Succ->instr_begin(), E = Succ->instr_end();
+ for (MachineBasicBlock::instr_iterator I = Succ->instr_begin(),
+ E = Succ->instr_end();
I != E && I->isPHI(); ++I) {
for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
- if (I->getOperand(ni+1).getMBB() == NMBB) {
+ if (I->getOperand(ni + 1).getMBB() == NMBB) {
MachineOperand &MO = I->getOperand(ni);
Register Reg = MO.getReg();
PHISrcRegs.insert(Reg);
@@ -1488,8 +1503,8 @@ MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) {
return Insts.remove(MI);
}
-MachineBasicBlock::instr_iterator
-MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
+MachineBasicBlock::instr_iterator MachineBasicBlock::insert(instr_iterator I,
+ MachineInstr *MI) {
assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() &&
"Cannot insert instruction with bundle flags");
// Set the bundle flags when inserting inside a bundle.
@@ -1523,7 +1538,8 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
MachineBasicBlock::instr_iterator I = instr_end();
while (I != instr_begin()) {
--I;
- if (!I->isTerminator()) break;
+ if (!I->isTerminator())
+ break;
// Scan the operands of this machine instruction, replacing any uses of Old
// with New.
@@ -1548,8 +1564,7 @@ void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
/// Find the next valid DebugLoc starting at MBBI, skipping any debug
/// instructions. Return UnknownLoc if there is none.
-DebugLoc
-MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
+DebugLoc MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
// Skip debug declarations, we don't want a DebugLoc from them.
MBBI = skipDebugInstructionsForward(MBBI, instr_end());
if (MBBI != instr_end())
@@ -1591,8 +1606,7 @@ DebugLoc MachineBasicBlock::rfindPrevDebugLoc(reverse_instr_iterator MBBI) {
/// Find and return the merged DebugLoc of the branch instructions of the block.
/// Return UnknownLoc if there is none.
-DebugLoc
-MachineBasicBlock::findBranchDebugLoc() {
+DebugLoc MachineBasicBlock::findBranchDebugLoc() {
DebugLoc DL;
auto TI = getFirstTerminator();
while (TI != end() && !TI->isBranch())
@@ -1600,7 +1614,7 @@ MachineBasicBlock::findBranchDebugLoc() {
if (TI != end()) {
DL = TI->getDebugLoc();
- for (++TI ; TI != end() ; ++TI)
+ for (++TI; TI != end(); ++TI)
if (TI->isBranch())
DL = DebugLoc::getMergedLocation(DL, TI->getDebugLoc());
}
@@ -1682,7 +1696,8 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
/// instructions after (searching just for defs) MI.
MachineBasicBlock::LivenessQueryResult
MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
- MCRegister Reg, const_iterator Before,
+ MCRegister Reg,
+ const_iterator Before,
unsigned Neighborhood) const {
unsigned N = Neighborhood;
@@ -1717,7 +1732,6 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
return LQR_Dead;
}
-
N = Neighborhood;
// Start by searching backwards from Before, looking for kills, reads or defs.
@@ -1792,9 +1806,7 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
}
-void MachineBasicBlock::clearLiveIns() {
- LiveIns.clear();
-}
+void MachineBasicBlock::clearLiveIns() { LiveIns.clear(); }
void MachineBasicBlock::clearLiveIns(
std::vector<RegisterMaskPair> &OldLiveIns) {
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index f9bd233cf8ecf..522532aa9be5d 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -1,126 +1,113 @@
add_llvm_component_group(X86 HAS_JIT)
-set(LLVM_TARGET_DEFINITIONS X86.td)
+ set(LLVM_TARGET_DEFINITIONS X86.td)
-tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM X86GenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
-tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM X86GenInstrMapping.inc -gen-x86-instr-mapping)
-tablegen(LLVM X86GenExegesis.inc -gen-exegesis)
-tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
-tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
-tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info
- -instr-info-expand-mi-operand-info=0)
-tablegen(LLVM X86GenMnemonicTables.inc -gen-x86-mnemonic-tables -asmwriternum=1)
-tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
-tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables -asmwriternum=1)
+ tablegen(LLVM X86GenAsmMatcher.inc - gen - asm - matcher) tablegen(
+ LLVM X86GenAsmWriter.inc - gen - asm -
+ writer) tablegen(LLVM X86GenAsmWriter1.inc - gen - asm - writer -
+ asmwriternum =
+ 1) tablegen(LLVM X86GenCallingConv.inc - gen -
+ callingconv)
+ tablegen(LLVM X86GenDAGISel.inc - gen - dag - isel) tablegen(
+ LLVM X86GenDisassemblerTables.inc - gen -
+ disassembler) tablegen(LLVM X86GenInstrMapping.inc - gen - x86 -
+ instr -
+ mapping) tablegen(LLVM X86GenExegesis
+ .inc -
+ gen - exegesis)
+ tablegen(LLVM X86GenFastISel.inc - gen - fast - isel) tablegen(
+ LLVM X86GenGlobalISel.inc - gen - global -
+ isel) tablegen(LLVM X86GenInstrInfo.inc - gen - instr -
+ info - instr - info - expand - mi -
+ operand - info =
+ 0) tablegen(LLVM X86GenMnemonicTables
+ .inc -
+ gen - x86 - mnemonic -
+ tables - asmwriternum =
+ 1) tablegen(LLVM X86GenRegisterBank
+ .inc -
+ gen -
+ register -
+ bank)
+ tablegen(
+ LLVM X86GenRegisterInfo.inc - gen - register -
+ info) tablegen(LLVM X86GenSubtargetInfo.inc - gen -
+ subtarget) tablegen(LLVM X86GenFoldTables
+ .inc -
+ gen - x86 -
+ fold - tables -
+ asmwriternum = 1)
-add_public_tablegen_target(X86CommonTableGen)
+ add_public_tablegen_target(X86CommonTableGen)
-set(sources
- X86ArgumentStackSlotRebase.cpp
- X86AsmPrinter.cpp
- X86AvoidTrailingCall.cpp
- X86CallFrameOptimization.cpp
- X86CallingConv.cpp
- X86CmovConversion.cpp
- X86CodeGenPassBuilder.cpp
- X86DomainReassignment.cpp
- X86DiscriminateMemOps.cpp
- X86LowerTileCopy.cpp
- X86LowerAMXType.cpp
- X86LowerAMXIntrinsics.cpp
- X86TileConfig.cpp
- X86FastPreTileConfig.cpp
- X86FastTileConfig.cpp
- X86PreTileConfig.cpp
- X86ExpandPseudo.cpp
- X86FastISel.cpp
- X86FixupBWInsts.cpp
- X86FixupLEAs.cpp
- X86FixupInstTuning.cpp
- X86FixupVectorConstants.cpp
- X86AvoidStoreForwardingBlocks.cpp
- X86DynAllocaExpander.cpp
- X86FixupSetCC.cpp
- X86FlagsCopyLowering.cpp
- X86FloatingPoint.cpp
- X86FrameLowering.cpp
- X86ISelDAGToDAG.cpp
- X86ISelLowering.cpp
- X86ISelLoweringCall.cpp
- X86IndirectBranchTracking.cpp
- X86IndirectThunks.cpp
- X86InterleavedAccess.cpp
- X86InsertPrefetch.cpp
- X86InstCombineIntrinsic.cpp
- X86InstrFMA3Info.cpp
- X86InstrFoldTables.cpp
- X86InstrInfo.cpp
- X86CompressEVEX.cpp
- X86LoadValueInjectionLoadHardening.cpp
- X86LoadValueInjectionRetHardening.cpp
- X86MCInstLower.cpp
- X86MachineFunctionInfo.cpp
- X86MacroFusion.cpp
- X86OptimizeLEAs.cpp
- X86PadShortFunction.cpp
- X86PartialReduction.cpp
- X86RegisterInfo.cpp
- X86ReturnThunks.cpp
- X86SelectionDAGInfo.cpp
- X86ShuffleDecodeConstantPool.cpp
- X86SpeculativeLoadHardening.cpp
- X86SpeculativeExecutionSideEffectSuppression.cpp
- X86Subtarget.cpp
- X86SuppressAPXForReloc.cpp
- X86TargetMachine.cpp
- X86TargetObjectFile.cpp
- X86TargetTransformInfo.cpp
- X86VZeroUpper.cpp
- X86WinEHState.cpp
- X86WinEHUnwindV2.cpp
- X86InsertWait.cpp
- GISel/X86CallLowering.cpp
- GISel/X86InstructionSelector.cpp
- GISel/X86LegalizerInfo.cpp
- GISel/X86RegisterBankInfo.cpp
- )
+ set(sources X86ArgumentStackSlotRebase
+ .cpp X86AsmPrinter.cpp X86AvoidTrailingCall
+ .cpp X86CallFrameOptimization
+ .cpp X86CallingConv.cpp X86CmovConversion
+ .cpp X86CodeGenPassBuilder
+ .cpp X86DomainReassignment
+ .cpp X86DiscriminateMemOps
+ .cpp X86LowerTileCopy.cpp X86LowerAMXType
+ .cpp X86LowerAMXIntrinsics.cpp X86TileConfig
+ .cpp X86FastPreTileConfig
+ .cpp X86FastTileConfig.cpp X86PreTileConfig
+ .cpp X86ExpandPseudo.cpp X86FastISel
+ .cpp X86FixupBWInsts.cpp X86FixupLEAs
+ .cpp X86FixupInstTuning
+ .cpp X86FixupVectorConstants
+ .cpp X86AvoidStoreForwardingBlocks
+ .cpp X86DynAllocaExpander.cpp X86FixupSetCC
+ .cpp X86FlagsCopyLowering
+ .cpp X86FloatingPoint.cpp X86FrameLowering
+ .cpp X86ISelDAGToDAG.cpp X86ISelLowering
+ .cpp X86ISelLoweringCall
+ .cpp X86IndirectBranchTracking
+ .cpp X86IndirectThunks
+ .cpp X86InterleavedAccess
+ .cpp X86InsertPrefetch.cpp PrefetchInsertion
+ .cpp X86InstCombineIntrinsic
+ .cpp X86InstrFMA3Info.cpp X86InstrFoldTables
+ .cpp X86InstrInfo.cpp X86CompressEVEX
+ .cpp X86LoadValueInjectionLoadHardening
+ .cpp X86LoadValueInjectionRetHardening
+ .cpp X86MCInstLower
+ .cpp X86MachineFunctionInfo
+ .cpp X86MacroFusion.cpp X86OptimizeLEAs
+ .cpp X86PadShortFunction
+ .cpp X86PartialReduction.cpp X86RegisterInfo
+ .cpp X86ReturnThunks.cpp X86SelectionDAGInfo
+ .cpp X86ShuffleDecodeConstantPool
+ .cpp X86SpeculativeLoadHardening
+ .cpp
+ X86SpeculativeExecutionSideEffectSuppression
+ .cpp X86Subtarget.cpp X86SuppressAPXForReloc
+ .cpp X86TargetMachine
+ .cpp X86TargetObjectFile
+ .cpp X86TargetTransformInfo
+ .cpp X86VZeroUpper.cpp X86WinEHState
+ .cpp X86WinEHUnwindV2.cpp X86InsertWait
+ .cpp GISel /
+ X86CallLowering.cpp GISel /
+ X86InstructionSelector.cpp GISel /
+ X86LegalizerInfo.cpp GISel /
+ X86RegisterBankInfo.cpp)
-add_llvm_target(X86CodeGen ${sources}
- LINK_COMPONENTS
- Analysis
- AsmPrinter
- CFGuard
- CodeGen
- CodeGenTypes
- Core
- GlobalISel
- IRPrinter
- Instrumentation
- MC
- ObjCARC
- ProfileData
- Scalar
- SelectionDAG
- Support
- Target
- TargetParser
- TransformUtils
- X86Desc
- X86Info
+ add_llvm_target(
+ X86CodeGen ${
+ sources} LINK_COMPONENTS Analysis
+ AsmPrinter CFGuard CodeGen CodeGenTypes Core
+ GlobalISel IRPrinter Instrumentation
+ MC ObjCARC ProfileData Scalar
+ SelectionDAG Support Target
+ TargetParser
+ TransformUtils
+ X86Desc X86Info
- ADD_TO_COMPONENT
- X86
- )
+ ADD_TO_COMPONENT
+ X86)
-add_subdirectory(AsmParser)
-add_subdirectory(Disassembler)
-add_subdirectory(MCA)
-add_subdirectory(MCTargetDesc)
-add_subdirectory(TargetInfo)
+ add_subdirectory(AsmParser)
+ add_subdirectory(Disassembler)
+ add_subdirectory(MCA)
+ add_subdirectory(MCTargetDesc)
+ add_subdirectory(TargetInfo)
diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp
new file mode 100644
index 0000000000000..720a38cb9b011
--- /dev/null
+++ b/llvm/lib/Target/X86/PrefetchInsertion.cpp
@@ -0,0 +1,209 @@
+//===-- PrefetchInsertion.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include <map>
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+static cl::opt<bool> UseCodePrefetchInstruction(
+ "use-code-prefetch-instruction",
+ cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true),
+ cl::Hidden);
+static cl::opt<bool> PrefetchNextAddress(
+ "prefetch-next-address",
+ cl::desc(
+ "Whether to prefetch the next address instead of the target address."),
+ cl::init(false), cl::Hidden);
+
+namespace {} // end anonymous namespace
+
+namespace llvm {
+class PrefetchInsertion : public MachineFunctionPass {
+public:
+ static char ID;
+
+ BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+
+ PrefetchInsertion() : MachineFunctionPass(ID) {
+ initializePrefetchInsertionPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Prefetch Insertion Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // namespace llvm
+
+char PrefetchInsertion::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ PrefetchInsertion, "prefetch-insertion",
+ "Applies path clonings for the -basic-block-sections=list option", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+ PrefetchInsertion, "prefetch-insertion",
+ "Applies path clonings for the -basic-block-sections=list option", false,
+ false)
+
+bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) {
+ assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+ "BB Sections list not enabled!");
+ if (hasInstrProfHashMismatch(MF))
+ return false;
+ // errs() << "Running on " << MF.getName() << "\n";
+ Function &F = MF.getFunction();
+ auto PtrTy = PointerType::getUnqual(F.getParent()->getContext());
+ DenseSet<BBPosition> PrefetchTargets =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchTargetsForFunction(MF.getName());
+ // errs() << "Targets: Function: " << F.getName() << " "
+ // << PrefetchTargets.size() << "\n";
+ DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+ for (const auto &P : PrefetchTargets)
+ PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset);
+ for (auto &[BBID, V] : PrefetchTargetsByBBID)
+ llvm::sort(V);
+ for (auto &BB : MF)
+ BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]);
+
+ for (const BBPosition &P : PrefetchTargets) {
+ SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+ PrefetchTargetName += F.getName();
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(P.BBID.BaseID);
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(P.BBOffset);
+ F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
+ }
+
+ SmallVector<PrefetchHint> PrefetchHints =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchHintsForFunction(MF.getName());
+ // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size()
+ // << "\n";
+ for (const PrefetchHint &H : PrefetchHints) {
+ SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+ PrefetchTargetName += H.TargetFunctionName;
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID);
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(H.TargetPosition.BBOffset);
+ F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
+ }
+
+ DenseMap<UniqueBBID, std::map<unsigned, SmallVector<PrefetchTarget>>>
+ PrefetchHintsByBBID;
+ for (const auto &H : PrefetchHints) {
+ PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back(
+ PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID,
+ H.TargetPosition.BBOffset});
+ }
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ for (auto &BB : MF) {
+ auto It = PrefetchHintsByBBID.find(*BB.getBBID());
+ if (It == PrefetchHintsByBBID.end())
+ continue;
+ auto BBPrefetchHintIt = It->second.begin();
+ unsigned NumInsts = 0;
+ auto E = BB.getFirstTerminator();
+ unsigned NumCallsites = 0;
+ for (auto I = BB.instr_begin();;) {
+ auto Current = I;
+ if (NumCallsites >= BBPrefetchHintIt->first || Current == E) {
+ for (const auto &PrefetchTarget : BBPrefetchHintIt->second) {
+ SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+ PrefetchTargetName += PrefetchTarget.TargetFunction;
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID);
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset);
+ auto *GV =
+ MF.getFunction().getParent()->getNamedValue(PrefetchTargetName);
+ // errs() << "Inserting prefetch for " << GV->getName() << " at "
+ // << MF.getName() << " " << BB.getName() << " " << NumInsts
+ // << "\n";
+ MachineInstr *PFetch = MF.CreateMachineInstr(
+ UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1)
+ : TII->get(X86::PREFETCHT1),
+ Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(),
+ true);
+ PFetch->setFlag(MachineInstr::Prefetch);
+ MachineInstrBuilder MIB(MF, PFetch);
+ if (!PrefetchNextAddress) {
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
+ /*base_alignment=*/llvm::Align(1)));
+ }
+ MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+ if (PrefetchNextAddress)
+ MIB.addImm(0);
+ else
+ MIB.addGlobalAddress(GV);
+ MIB.addReg(X86::NoRegister);
+ BB.insert(Current, PFetch);
+ }
+ ++BBPrefetchHintIt;
+ if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end())
+ break;
+ }
+ if (Current != E) {
+ // Print the assembly for the instruction.
+ if (!Current->isPosition() && !Current->isImplicitDef() &&
+ !Current->isKill() && !Current->isDebugInstr()) {
+ ++NumInsts;
+ }
+ if (Current->isCall())
+ ++NumCallsites;
+ ++I;
+ }
+ }
+ }
+ return true;
+}
+
+void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createPrefetchInsertionPass() {
+ return new PrefetchInsertion();
+}
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 51b540a7a51d0..5fd0eb8f4f594 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -76,7 +76,8 @@ FunctionPass *createX86OptimizeLEAs();
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
FunctionPass *createX86FixupSetCC();
-/// Return a pass that avoids creating store forward block issues in the hardware.
+/// Return a pass that avoids creating store forward block issues in the
+/// hardware.
FunctionPass *createX86AvoidStoreForwardingBlocks();
/// Return a pass that lowers EFLAGS copy pseudo instructions.
@@ -134,8 +135,8 @@ FunctionPass *createX86FixupBWInsts();
/// to another, when profitable.
FunctionPass *createX86DomainReassignmentPass();
-/// This pass compress instructions from EVEX space to legacy/VEX/EVEX space when
-/// possible in order to reduce code size or facilitate HW decoding.
+/// This pass compress instructions from EVEX space to legacy/VEX/EVEX space
+/// when possible in order to reduce code size or facilitate HW decoding.
FunctionPass *createX86CompressEVEXPass();
/// This pass creates the thunks for the retpoline feature.
@@ -151,6 +152,8 @@ FunctionPass *createX86DiscriminateMemOpsPass();
/// This pass applies profiling information to insert cache prefetches.
FunctionPass *createX86InsertPrefetchPass();
+FunctionPass *createPrefetchInsertionPass();
+
/// This pass insert wait instruction after X87 instructions which could raise
/// fp exceptions when strict-fp enabled.
FunctionPass *createX86InsertX87waitPass();
@@ -238,8 +241,8 @@ enum : unsigned {
PTR32_UPTR = 271,
PTR64 = 272
};
-} // End X86AS namespace
+} // namespace X86AS
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 9a76abcd351bf..2c777e7f4ac38 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -54,9 +54,10 @@
using namespace llvm;
-static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
- cl::desc("Enable the machine combiner pass"),
- cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ EnableMachineCombinerPass("x86-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableTileRAPass("x86-tile-ra",
@@ -362,7 +363,7 @@ namespace {
class X86PassConfig : public TargetPassConfig {
public:
X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {}
X86TargetMachine &getX86TargetMachine() const {
return getTM<X86TargetMachine>();
@@ -401,10 +402,10 @@ char X86ExecutionDomainFix::ID;
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix",
- "X86 Execution Domain Fix", false, false)
+ "X86 Execution Domain Fix", false, false)
INITIALIZE_PASS_DEPENDENCY(ReachingDefInfoWrapperPass)
INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix",
- "X86 Execution Domain Fix", false, false)
+ "X86 Execution Domain Fix", false, false)
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(*this, PM);
@@ -627,6 +628,9 @@ void X86PassConfig::addPreEmitPass2() {
// after all real instructions have been added to the epilog.
if (TT.isOSWindows() && TT.isX86_64())
addPass(createX86WinEHUnwindV2Pass());
+
+ if (TM->getBBSectionsType() == llvm::BasicBlockSection::List)
+ addPass(createPrefetchInsertionPass());
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
More information about the llvm-commits
mailing list