[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 3 23:32:43 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Rahman Lavaee (rlavaee)
<details>
<summary>Changes</summary>
This commit introduces a new pass for prefetch insertion on X86 targets. The pass utilizes Propeller profiles to guide prefetch placement, optimizing memory access patterns.
The new file llvm/lib/Target/X86/PrefetchInsertion.cpp implements this functionality. This commit also includes necessary modifications to related CodeGen and X86 target files to integrate the new pass.
A build issue where PrefetchInsertion.cpp was not included in the CMakeLists.txt was also resolved.
---
Patch is 93.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166324.diff
13 Files Affected:
- (modified) llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h (+46-3)
- (modified) llvm/include/llvm/CodeGen/MachineBasicBlock.h (+85-85)
- (modified) llvm/include/llvm/CodeGen/MachineInstr.h (+39-51)
- (modified) llvm/include/llvm/InitializePasses.h (+1)
- (modified) llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (+122-54)
- (modified) llvm/lib/CodeGen/BasicBlockSections.cpp (+3-3)
- (modified) llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (+83-3)
- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+33-33)
- (modified) llvm/lib/CodeGen/MachineBasicBlock.cpp (+50-38)
- (modified) llvm/lib/Target/X86/CMakeLists.txt (+105-118)
- (added) llvm/lib/Target/X86/PrefetchInsertion.cpp (+209)
- (modified) llvm/lib/Target/X86/X86.h (+8-5)
- (modified) llvm/lib/Target/X86/X86TargetMachine.cpp (+10-6)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 48650a6df22ff..b288374a38226 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
+struct BBPosition {
+ UniqueBBID BBID;
+ unsigned BBOffset;
+};
+
+struct PrefetchHint {
+ BBPosition SitePosition;
+ StringRef TargetFunctionName;
+ BBPosition TargetPosition;
+};
+
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -50,19 +61,42 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
+ SmallVector<PrefetchHint> PrefetchHints;
+ DenseSet<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
- // Edge counts for each edge, stored as a nested map.
+ // Edge counts for each edge.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
};
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+ static inline BBPosition getEmptyKey() {
+ return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline BBPosition getTombstoneKey() {
+ return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const BBPosition &Val) {
+ std::pair<unsigned, unsigned> PairVal = std::make_pair(
+ DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+ return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+ DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+ }
+};
+
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
BasicBlockSectionsProfileReader(const MemoryBuffer *Buf)
- : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'){};
+ : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') {};
- BasicBlockSectionsProfileReader(){};
+ BasicBlockSectionsProfileReader() {};
// Returns true if basic block sections profile exist for function \p
// FuncName.
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 71739278cf513..deff97416df23 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
+struct PrefetchTarget {
+ StringRef TargetFunction;
+ UniqueBBID TargetBBID;
+ unsigned TargetBBOffset;
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -229,6 +237,8 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
+ mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -254,9 +264,7 @@ class MachineBasicBlock
/// Remove the reference to the underlying IR BasicBlock. This is for
/// reduction tools and should generally not be used.
- void clearBasicBlock() {
- BB = nullptr;
- }
+ void clearBasicBlock() { BB = nullptr; }
/// Check if there is a name of corresponding LLVM basic block.
LLVM_ABI bool hasName() const;
@@ -348,24 +356,24 @@ class MachineBasicBlock
LLVM_ABI bool sizeWithoutDebugLargerThan(unsigned Limit) const;
bool empty() const { return Insts.empty(); }
- MachineInstr &instr_front() { return Insts.front(); }
- MachineInstr &instr_back() { return Insts.back(); }
+ MachineInstr &instr_front() { return Insts.front(); }
+ MachineInstr &instr_back() { return Insts.back(); }
const MachineInstr &instr_front() const { return Insts.front(); }
- const MachineInstr &instr_back() const { return Insts.back(); }
-
- MachineInstr &front() { return Insts.front(); }
- MachineInstr &back() { return *--end(); }
- const MachineInstr &front() const { return Insts.front(); }
- const MachineInstr &back() const { return *--end(); }
-
- instr_iterator instr_begin() { return Insts.begin(); }
- const_instr_iterator instr_begin() const { return Insts.begin(); }
- instr_iterator instr_end() { return Insts.end(); }
- const_instr_iterator instr_end() const { return Insts.end(); }
- reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
+ const MachineInstr &instr_back() const { return Insts.back(); }
+
+ MachineInstr &front() { return Insts.front(); }
+ MachineInstr &back() { return *--end(); }
+ const MachineInstr &front() const { return Insts.front(); }
+ const MachineInstr &back() const { return *--end(); }
+
+ instr_iterator instr_begin() { return Insts.begin(); }
+ const_instr_iterator instr_begin() const { return Insts.begin(); }
+ instr_iterator instr_end() { return Insts.end(); }
+ const_instr_iterator instr_end() const { return Insts.end(); }
+ reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); }
- reverse_instr_iterator instr_rend () { return Insts.rend(); }
- const_reverse_instr_iterator instr_rend () const { return Insts.rend(); }
+ reverse_instr_iterator instr_rend() { return Insts.rend(); }
+ const_reverse_instr_iterator instr_rend() const { return Insts.rend(); }
using instr_range = iterator_range<instr_iterator>;
using const_instr_range = iterator_range<const_instr_iterator>;
@@ -374,10 +382,10 @@ class MachineBasicBlock
return const_instr_range(instr_begin(), instr_end());
}
- iterator begin() { return instr_begin(); }
- const_iterator begin() const { return instr_begin(); }
- iterator end () { return instr_end(); }
- const_iterator end () const { return instr_end(); }
+ iterator begin() { return instr_begin(); }
+ const_iterator begin() const { return instr_begin(); }
+ iterator end() { return instr_end(); }
+ const_iterator end() const { return instr_end(); }
reverse_iterator rbegin() {
return reverse_iterator::getAtBundleBegin(instr_rbegin());
}
@@ -424,38 +432,30 @@ class MachineBasicBlock
SmallVectorImpl<MachineBasicBlock *>::reverse_iterator;
using const_succ_reverse_iterator =
SmallVectorImpl<MachineBasicBlock *>::const_reverse_iterator;
- pred_iterator pred_begin() { return Predecessors.begin(); }
- const_pred_iterator pred_begin() const { return Predecessors.begin(); }
- pred_iterator pred_end() { return Predecessors.end(); }
- const_pred_iterator pred_end() const { return Predecessors.end(); }
- pred_reverse_iterator pred_rbegin()
- { return Predecessors.rbegin();}
- const_pred_reverse_iterator pred_rbegin() const
- { return Predecessors.rbegin();}
- pred_reverse_iterator pred_rend()
- { return Predecessors.rend(); }
- const_pred_reverse_iterator pred_rend() const
- { return Predecessors.rend(); }
- unsigned pred_size() const {
- return (unsigned)Predecessors.size();
- }
- bool pred_empty() const { return Predecessors.empty(); }
- succ_iterator succ_begin() { return Successors.begin(); }
- const_succ_iterator succ_begin() const { return Successors.begin(); }
- succ_iterator succ_end() { return Successors.end(); }
- const_succ_iterator succ_end() const { return Successors.end(); }
- succ_reverse_iterator succ_rbegin()
- { return Successors.rbegin(); }
- const_succ_reverse_iterator succ_rbegin() const
- { return Successors.rbegin(); }
- succ_reverse_iterator succ_rend()
- { return Successors.rend(); }
- const_succ_reverse_iterator succ_rend() const
- { return Successors.rend(); }
- unsigned succ_size() const {
- return (unsigned)Successors.size();
- }
- bool succ_empty() const { return Successors.empty(); }
+ pred_iterator pred_begin() { return Predecessors.begin(); }
+ const_pred_iterator pred_begin() const { return Predecessors.begin(); }
+ pred_iterator pred_end() { return Predecessors.end(); }
+ const_pred_iterator pred_end() const { return Predecessors.end(); }
+ pred_reverse_iterator pred_rbegin() { return Predecessors.rbegin(); }
+ const_pred_reverse_iterator pred_rbegin() const {
+ return Predecessors.rbegin();
+ }
+ pred_reverse_iterator pred_rend() { return Predecessors.rend(); }
+ const_pred_reverse_iterator pred_rend() const { return Predecessors.rend(); }
+ unsigned pred_size() const { return (unsigned)Predecessors.size(); }
+ bool pred_empty() const { return Predecessors.empty(); }
+ succ_iterator succ_begin() { return Successors.begin(); }
+ const_succ_iterator succ_begin() const { return Successors.begin(); }
+ succ_iterator succ_end() { return Successors.end(); }
+ const_succ_iterator succ_end() const { return Successors.end(); }
+ succ_reverse_iterator succ_rbegin() { return Successors.rbegin(); }
+ const_succ_reverse_iterator succ_rbegin() const {
+ return Successors.rbegin();
+ }
+ succ_reverse_iterator succ_rend() { return Successors.rend(); }
+ const_succ_reverse_iterator succ_rend() const { return Successors.rend(); }
+ unsigned succ_size() const { return (unsigned)Successors.size(); }
+ bool succ_empty() const { return Successors.empty(); }
inline iterator_range<pred_iterator> predecessors() {
return make_range(pred_begin(), pred_end());
@@ -528,8 +528,8 @@ class MachineBasicBlock
}
LLVM_ABI livein_iterator livein_begin() const;
- livein_iterator livein_end() const { return LiveIns.end(); }
- bool livein_empty() const { return LiveIns.empty(); }
+ livein_iterator livein_end() const { return LiveIns.end(); }
+ bool livein_empty() const { return LiveIns.empty(); }
iterator_range<livein_iterator> liveins() const {
return make_range(livein_begin(), livein_end());
}
@@ -581,13 +581,9 @@ class MachineBasicBlock
return Tmp;
}
- reference operator*() const {
- return *LiveRegI;
- }
+ reference operator*() const { return *LiveRegI; }
- pointer operator->() const {
- return &*LiveRegI;
- }
+ pointer operator->() const { return &*LiveRegI; }
bool operator==(const liveout_iterator &RHS) const {
if (BlockI != BlockEnd)
@@ -598,6 +594,7 @@ class MachineBasicBlock
bool operator!=(const liveout_iterator &RHS) const {
return !(*this == RHS);
}
+
private:
bool advanceToValidPosition() {
if (LiveRegI != (*BlockI)->livein_end())
@@ -710,6 +707,14 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
+ const SmallVector<unsigned> &getPrefetchTargets() const {
+ return PrefetchTargets;
+ }
+
+ void setPrefetchTargets(const SmallVector<unsigned> &V) {
+ PrefetchTargets = V;
+ }
+
/// Returns the section ID of this basic block.
MBBSectionID getSectionID() const { return SectionID; }
@@ -978,9 +983,7 @@ class MachineBasicBlock
/// Convenience function that returns true if the block ends in a return
/// instruction.
- bool isReturnBlock() const {
- return !empty() && back().isReturn();
- }
+ bool isReturnBlock() const { return !empty() && back().isReturn(); }
/// Convenience function that returns true if the bock ends in a EH scope
/// return instruction.
@@ -1057,8 +1060,7 @@ class MachineBasicBlock
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M);
/// Insert a range of instructions into the instruction list before I.
- template<typename IT>
- void insert(iterator I, IT S, IT E) {
+ template <typename IT> void insert(iterator I, IT S, IT E) {
assert((I == end() || I->getParent() == this) &&
"iterator points outside of basic block");
Insts.insert(I.getInstrIterator(), S, E);
@@ -1116,17 +1118,13 @@ class MachineBasicBlock
/// Remove an instruction or bundle from the instruction list and delete it.
///
/// If I points to a bundle of instructions, they are all erased.
- iterator erase(iterator I) {
- return erase(I, std::next(I));
- }
+ iterator erase(iterator I) { return erase(I, std::next(I)); }
/// Remove an instruction from the instruction list and delete it.
///
/// If I is the head of a bundle of instructions, the whole bundle will be
/// erased.
- iterator erase(MachineInstr *I) {
- return erase(iterator(I));
- }
+ iterator erase(MachineInstr *I) { return erase(iterator(I)); }
/// Remove the unbundled instruction from the instruction list without
/// deleting it.
@@ -1145,9 +1143,7 @@ class MachineBasicBlock
/// bundle will still be bundled after removing the single instruction.
LLVM_ABI MachineInstr *remove_instr(MachineInstr *I);
- void clear() {
- Insts.clear();
- }
+ void clear() { Insts.clear(); }
/// Take an instruction from MBB 'Other' at the position From, and insert it
/// into this MBB right before 'Where'.
@@ -1164,8 +1160,8 @@ class MachineBasicBlock
///
/// The instruction at 'Where' must not be included in the range of
/// instructions to move.
- void splice(iterator Where, MachineBasicBlock *Other,
- iterator From, iterator To) {
+ void splice(iterator Where, MachineBasicBlock *Other, iterator From,
+ iterator To) {
Insts.splice(Where.getInstrIterator(), Other->Insts,
From.getInstrIterator(), To.getInstrIterator());
}
@@ -1251,7 +1247,7 @@ class MachineBasicBlock
bool IsStandalone = true) const;
enum PrintNameFlag {
- PrintNameIr = (1 << 0), ///< Add IR name where available
+ PrintNameIr = (1 << 0), ///< Add IR name where available
PrintNameAttributes = (1 << 1), ///< Print attributes
};
@@ -1275,6 +1271,12 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
+ MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+ const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
+ return CallInstSymbols;
+ }
+
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
@@ -1282,9 +1284,7 @@ class MachineBasicBlock
return IrrLoopHeaderWeight;
}
- void setIrrLoopHeaderWeight(uint64_t Weight) {
- IrrLoopHeaderWeight = Weight;
- }
+ void setIrrLoopHeaderWeight(uint64_t Weight) { IrrLoopHeaderWeight = Weight; }
/// Return probability of the edge from this block to MBB. This method should
/// NOT be called directly, but by using getEdgeProbability method from
@@ -1393,7 +1393,7 @@ static_assert(GraphHasNodeNumbers<const MachineBasicBlock *>,
// to be when traversing the predecessor edges of a MBB
// instead of the successor edges.
//
-template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<MachineBasicBlock *>> {
using NodeRef = MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::pred_iterator;
@@ -1413,7 +1413,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
static_assert(GraphHasNodeNumbers<Inverse<MachineBasicBlock *>>,
"GraphTraits getNumber() not detected");
-template <> struct GraphTraits<Inverse<const MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<const MachineBasicBlock *>> {
using NodeRef = const MachineBasicBlock *;
using ChildIteratorType = MachineBasicBlock::const_pred_iterator;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4fcb7f36e0238..ab9fe82bc7917 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -78,9 +78,9 @@ class MachineInstr
/// otherwise easily derivable from the IR text.
///
enum CommentFlag {
- ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
+ ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
NoSchedComment = 0x2,
- TAsmComments = 0x4 // Target Asm comments should start from this value.
+ TAsmComments = 0x4 // Target Asm comments should start from this value.
};
enum MIFlag {
@@ -123,16 +123,17 @@ class MachineInstr
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
SameSign = 1 << 21, // Both operands have the same sign.
- InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
+ InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
// Implies NoUSWrap.
+ Prefetch = 1 << 23, // Instruction is a prefetch.
};
private:
- const MCInstrDesc *MCID; // Instruction descriptor.
- MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
+ const MCInstrDesc *MCID; // Instruction descriptor.
+ MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
// Operands are allocated by an ArrayRecycler.
- MachineOperand *Operands = nullptr; // Pointer to the first operand.
+ MachineOperand *Operands = nullptr; // Pointer to the first operand.
#define LLVM_MI_NUMOPERANDS_BITS 24
#define LLVM_MI_FLAGS_BITS 24
@@ -144,7 +145,7 @@ class MachineInstr
// OperandCapacity has uint8_t size, so it should be next to NumOperands
// to properly pack.
using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
- OperandCapacity CapOperands; // Capacity of the Operands array.
+ OperandCapacity CapOperands; // Capacity of the Operands array.
/// Various bits of additional information about the machine instruction.
uint32_t Flags : LLVM_MI_FLAGS_BITS;
@@ -226,9 +227,8 @@ class MachineInstr
}
MDNode *getPCSections() const {
- return HasPCSections
- ? g...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/166324
More information about the llvm-commits
mailing list