[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 3 23:32:43 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Rahman Lavaee (rlavaee)

<details>
<summary>Changes</summary>

This commit introduces a new pass for prefetch insertion on X86 targets. The pass utilizes Propeller profiles to guide prefetch placement, optimizing memory access patterns.

The new file llvm/lib/Target/X86/PrefetchInsertion.cpp implements this functionality. This commit also includes necessary modifications to related CodeGen and X86 target files to integrate the new pass.

A build issue where PrefetchInsertion.cpp was not included in the CMakeLists.txt was also resolved.

---

Patch is 93.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166324.diff


13 Files Affected:

- (modified) llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h (+46-3) 
- (modified) llvm/include/llvm/CodeGen/MachineBasicBlock.h (+85-85) 
- (modified) llvm/include/llvm/CodeGen/MachineInstr.h (+39-51) 
- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (modified) llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (+122-54) 
- (modified) llvm/lib/CodeGen/BasicBlockSections.cpp (+3-3) 
- (modified) llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp (+83-3) 
- (modified) llvm/lib/CodeGen/CodeGenPrepare.cpp (+33-33) 
- (modified) llvm/lib/CodeGen/MachineBasicBlock.cpp (+50-38) 
- (modified) llvm/lib/Target/X86/CMakeLists.txt (+105-118) 
- (added) llvm/lib/Target/X86/PrefetchInsertion.cpp (+209) 
- (modified) llvm/lib/Target/X86/X86.h (+8-5) 
- (modified) llvm/lib/Target/X86/X86TargetMachine.cpp (+10-6) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 48650a6df22ff..b288374a38226 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
   unsigned PositionInCluster;
 };
 
+struct BBPosition {
+  UniqueBBID BBID;
+  unsigned BBOffset;
+};
+
+struct PrefetchHint {
+  BBPosition SitePosition;
+  StringRef TargetFunctionName;
+  BBPosition TargetPosition;
+};
+
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
   // BB Cluster information specified by `UniqueBBID`s.
@@ -50,19 +61,42 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
+  SmallVector<PrefetchHint> PrefetchHints;
+  DenseSet<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
-  // Edge counts for each edge, stored as a nested map.
+  // Edge counts for each edge.
   DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
 };
 
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+  static inline BBPosition getEmptyKey() {
+    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+            DenseMapInfo<unsigned>::getEmptyKey()};
+  }
+  static inline BBPosition getTombstoneKey() {
+    return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+                      DenseMapInfo<unsigned>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const BBPosition &Val) {
+    std::pair<unsigned, unsigned> PairVal = std::make_pair(
+        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+  }
+  static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+  }
+};
+
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
   BasicBlockSectionsProfileReader(const MemoryBuffer *Buf)
-      : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'){};
+      : MBuf(Buf), LineIt(*Buf, /*SkipBlanks=*/true, /*CommentMarker=*/'#') {};
 
-  BasicBlockSectionsProfileReader(){};
+  BasicBlockSectionsProfileReader() {};
 
   // Returns true if basic block sections profile exist for function \p
   // FuncName.
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 71739278cf513..deff97416df23 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
+struct PrefetchTarget {
+  StringRef TargetFunction;
+  UniqueBBID TargetBBID;
+  unsigned TargetBBOffset;
+};
+
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
+  SmallVector<unsigned> PrefetchTargets;
+
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
@@ -229,6 +237,8 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
+  mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
 
@@ -254,9 +264,7 @@ class MachineBasicBlock
 
   /// Remove the reference to the underlying IR BasicBlock. This is for
   /// reduction tools and should generally not be used.
-  void clearBasicBlock() {
-    BB = nullptr;
-  }
+  void clearBasicBlock() { BB = nullptr; }
 
   /// Check if there is a name of corresponding LLVM basic block.
   LLVM_ABI bool hasName() const;
@@ -348,24 +356,24 @@ class MachineBasicBlock
   LLVM_ABI bool sizeWithoutDebugLargerThan(unsigned Limit) const;
   bool empty() const { return Insts.empty(); }
 
-  MachineInstr       &instr_front()       { return Insts.front(); }
-  MachineInstr       &instr_back()        { return Insts.back();  }
+  MachineInstr &instr_front() { return Insts.front(); }
+  MachineInstr &instr_back() { return Insts.back(); }
   const MachineInstr &instr_front() const { return Insts.front(); }
-  const MachineInstr &instr_back()  const { return Insts.back();  }
-
-  MachineInstr       &front()             { return Insts.front(); }
-  MachineInstr       &back()              { return *--end();      }
-  const MachineInstr &front()       const { return Insts.front(); }
-  const MachineInstr &back()        const { return *--end();      }
-
-  instr_iterator                instr_begin()       { return Insts.begin();  }
-  const_instr_iterator          instr_begin() const { return Insts.begin();  }
-  instr_iterator                  instr_end()       { return Insts.end();    }
-  const_instr_iterator            instr_end() const { return Insts.end();    }
-  reverse_instr_iterator       instr_rbegin()       { return Insts.rbegin(); }
+  const MachineInstr &instr_back() const { return Insts.back(); }
+
+  MachineInstr &front() { return Insts.front(); }
+  MachineInstr &back() { return *--end(); }
+  const MachineInstr &front() const { return Insts.front(); }
+  const MachineInstr &back() const { return *--end(); }
+
+  instr_iterator instr_begin() { return Insts.begin(); }
+  const_instr_iterator instr_begin() const { return Insts.begin(); }
+  instr_iterator instr_end() { return Insts.end(); }
+  const_instr_iterator instr_end() const { return Insts.end(); }
+  reverse_instr_iterator instr_rbegin() { return Insts.rbegin(); }
   const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); }
-  reverse_instr_iterator       instr_rend  ()       { return Insts.rend();   }
-  const_reverse_instr_iterator instr_rend  () const { return Insts.rend();   }
+  reverse_instr_iterator instr_rend() { return Insts.rend(); }
+  const_reverse_instr_iterator instr_rend() const { return Insts.rend(); }
 
   using instr_range = iterator_range<instr_iterator>;
   using const_instr_range = iterator_range<const_instr_iterator>;
@@ -374,10 +382,10 @@ class MachineBasicBlock
     return const_instr_range(instr_begin(), instr_end());
   }
 
-  iterator                begin()       { return instr_begin();  }
-  const_iterator          begin() const { return instr_begin();  }
-  iterator                end  ()       { return instr_end();    }
-  const_iterator          end  () const { return instr_end();    }
+  iterator begin() { return instr_begin(); }
+  const_iterator begin() const { return instr_begin(); }
+  iterator end() { return instr_end(); }
+  const_iterator end() const { return instr_end(); }
   reverse_iterator rbegin() {
     return reverse_iterator::getAtBundleBegin(instr_rbegin());
   }
@@ -424,38 +432,30 @@ class MachineBasicBlock
       SmallVectorImpl<MachineBasicBlock *>::reverse_iterator;
   using const_succ_reverse_iterator =
       SmallVectorImpl<MachineBasicBlock *>::const_reverse_iterator;
-  pred_iterator        pred_begin()       { return Predecessors.begin(); }
-  const_pred_iterator  pred_begin() const { return Predecessors.begin(); }
-  pred_iterator        pred_end()         { return Predecessors.end();   }
-  const_pred_iterator  pred_end()   const { return Predecessors.end();   }
-  pred_reverse_iterator        pred_rbegin()
-                                          { return Predecessors.rbegin();}
-  const_pred_reverse_iterator  pred_rbegin() const
-                                          { return Predecessors.rbegin();}
-  pred_reverse_iterator        pred_rend()
-                                          { return Predecessors.rend();  }
-  const_pred_reverse_iterator  pred_rend()   const
-                                          { return Predecessors.rend();  }
-  unsigned             pred_size()  const {
-    return (unsigned)Predecessors.size();
-  }
-  bool                 pred_empty() const { return Predecessors.empty(); }
-  succ_iterator        succ_begin()       { return Successors.begin();   }
-  const_succ_iterator  succ_begin() const { return Successors.begin();   }
-  succ_iterator        succ_end()         { return Successors.end();     }
-  const_succ_iterator  succ_end()   const { return Successors.end();     }
-  succ_reverse_iterator        succ_rbegin()
-                                          { return Successors.rbegin();  }
-  const_succ_reverse_iterator  succ_rbegin() const
-                                          { return Successors.rbegin();  }
-  succ_reverse_iterator        succ_rend()
-                                          { return Successors.rend();    }
-  const_succ_reverse_iterator  succ_rend()   const
-                                          { return Successors.rend();    }
-  unsigned             succ_size()  const {
-    return (unsigned)Successors.size();
-  }
-  bool                 succ_empty() const { return Successors.empty();   }
+  pred_iterator pred_begin() { return Predecessors.begin(); }
+  const_pred_iterator pred_begin() const { return Predecessors.begin(); }
+  pred_iterator pred_end() { return Predecessors.end(); }
+  const_pred_iterator pred_end() const { return Predecessors.end(); }
+  pred_reverse_iterator pred_rbegin() { return Predecessors.rbegin(); }
+  const_pred_reverse_iterator pred_rbegin() const {
+    return Predecessors.rbegin();
+  }
+  pred_reverse_iterator pred_rend() { return Predecessors.rend(); }
+  const_pred_reverse_iterator pred_rend() const { return Predecessors.rend(); }
+  unsigned pred_size() const { return (unsigned)Predecessors.size(); }
+  bool pred_empty() const { return Predecessors.empty(); }
+  succ_iterator succ_begin() { return Successors.begin(); }
+  const_succ_iterator succ_begin() const { return Successors.begin(); }
+  succ_iterator succ_end() { return Successors.end(); }
+  const_succ_iterator succ_end() const { return Successors.end(); }
+  succ_reverse_iterator succ_rbegin() { return Successors.rbegin(); }
+  const_succ_reverse_iterator succ_rbegin() const {
+    return Successors.rbegin();
+  }
+  succ_reverse_iterator succ_rend() { return Successors.rend(); }
+  const_succ_reverse_iterator succ_rend() const { return Successors.rend(); }
+  unsigned succ_size() const { return (unsigned)Successors.size(); }
+  bool succ_empty() const { return Successors.empty(); }
 
   inline iterator_range<pred_iterator> predecessors() {
     return make_range(pred_begin(), pred_end());
@@ -528,8 +528,8 @@ class MachineBasicBlock
   }
 
   LLVM_ABI livein_iterator livein_begin() const;
-  livein_iterator livein_end()   const { return LiveIns.end(); }
-  bool            livein_empty() const { return LiveIns.empty(); }
+  livein_iterator livein_end() const { return LiveIns.end(); }
+  bool livein_empty() const { return LiveIns.empty(); }
   iterator_range<livein_iterator> liveins() const {
     return make_range(livein_begin(), livein_end());
   }
@@ -581,13 +581,9 @@ class MachineBasicBlock
       return Tmp;
     }
 
-    reference operator*() const {
-      return *LiveRegI;
-    }
+    reference operator*() const { return *LiveRegI; }
 
-    pointer operator->() const {
-      return &*LiveRegI;
-    }
+    pointer operator->() const { return &*LiveRegI; }
 
     bool operator==(const liveout_iterator &RHS) const {
       if (BlockI != BlockEnd)
@@ -598,6 +594,7 @@ class MachineBasicBlock
     bool operator!=(const liveout_iterator &RHS) const {
       return !(*this == RHS);
     }
+
   private:
     bool advanceToValidPosition() {
       if (LiveRegI != (*BlockI)->livein_end())
@@ -710,6 +707,14 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
+  const SmallVector<unsigned> &getPrefetchTargets() const {
+    return PrefetchTargets;
+  }
+
+  void setPrefetchTargets(const SmallVector<unsigned> &V) {
+    PrefetchTargets = V;
+  }
+
   /// Returns the section ID of this basic block.
   MBBSectionID getSectionID() const { return SectionID; }
 
@@ -978,9 +983,7 @@ class MachineBasicBlock
 
   /// Convenience function that returns true if the block ends in a return
   /// instruction.
-  bool isReturnBlock() const {
-    return !empty() && back().isReturn();
-  }
+  bool isReturnBlock() const { return !empty() && back().isReturn(); }
 
   /// Convenience function that returns true if the bock ends in a EH scope
   /// return instruction.
@@ -1057,8 +1060,7 @@ class MachineBasicBlock
   LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M);
 
   /// Insert a range of instructions into the instruction list before I.
-  template<typename IT>
-  void insert(iterator I, IT S, IT E) {
+  template <typename IT> void insert(iterator I, IT S, IT E) {
     assert((I == end() || I->getParent() == this) &&
            "iterator points outside of basic block");
     Insts.insert(I.getInstrIterator(), S, E);
@@ -1116,17 +1118,13 @@ class MachineBasicBlock
   /// Remove an instruction or bundle from the instruction list and delete it.
   ///
   /// If I points to a bundle of instructions, they are all erased.
-  iterator erase(iterator I) {
-    return erase(I, std::next(I));
-  }
+  iterator erase(iterator I) { return erase(I, std::next(I)); }
 
   /// Remove an instruction from the instruction list and delete it.
   ///
   /// If I is the head of a bundle of instructions, the whole bundle will be
   /// erased.
-  iterator erase(MachineInstr *I) {
-    return erase(iterator(I));
-  }
+  iterator erase(MachineInstr *I) { return erase(iterator(I)); }
 
   /// Remove the unbundled instruction from the instruction list without
   /// deleting it.
@@ -1145,9 +1143,7 @@ class MachineBasicBlock
   /// bundle will still be bundled after removing the single instruction.
   LLVM_ABI MachineInstr *remove_instr(MachineInstr *I);
 
-  void clear() {
-    Insts.clear();
-  }
+  void clear() { Insts.clear(); }
 
   /// Take an instruction from MBB 'Other' at the position From, and insert it
   /// into this MBB right before 'Where'.
@@ -1164,8 +1160,8 @@ class MachineBasicBlock
   ///
   /// The instruction at 'Where' must not be included in the range of
   /// instructions to move.
-  void splice(iterator Where, MachineBasicBlock *Other,
-              iterator From, iterator To) {
+  void splice(iterator Where, MachineBasicBlock *Other, iterator From,
+              iterator To) {
     Insts.splice(Where.getInstrIterator(), Other->Insts,
                  From.getInstrIterator(), To.getInstrIterator());
   }
@@ -1251,7 +1247,7 @@ class MachineBasicBlock
                       bool IsStandalone = true) const;
 
   enum PrintNameFlag {
-    PrintNameIr = (1 << 0), ///< Add IR name where available
+    PrintNameIr = (1 << 0),         ///< Add IR name where available
     PrintNameAttributes = (1 << 1), ///< Print attributes
   };
 
@@ -1275,6 +1271,12 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
+  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+  const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
+    return CallInstSymbols;
+  }
+
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
@@ -1282,9 +1284,7 @@ class MachineBasicBlock
     return IrrLoopHeaderWeight;
   }
 
-  void setIrrLoopHeaderWeight(uint64_t Weight) {
-    IrrLoopHeaderWeight = Weight;
-  }
+  void setIrrLoopHeaderWeight(uint64_t Weight) { IrrLoopHeaderWeight = Weight; }
 
   /// Return probability of the edge from this block to MBB. This method should
   /// NOT be called directly, but by using getEdgeProbability method from
@@ -1393,7 +1393,7 @@ static_assert(GraphHasNodeNumbers<const MachineBasicBlock *>,
 // to be when traversing the predecessor edges of a MBB
 // instead of the successor edges.
 //
-template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<MachineBasicBlock *>> {
   using NodeRef = MachineBasicBlock *;
   using ChildIteratorType = MachineBasicBlock::pred_iterator;
 
@@ -1413,7 +1413,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*>> {
 static_assert(GraphHasNodeNumbers<Inverse<MachineBasicBlock *>>,
               "GraphTraits getNumber() not detected");
 
-template <> struct GraphTraits<Inverse<const MachineBasicBlock*>> {
+template <> struct GraphTraits<Inverse<const MachineBasicBlock *>> {
   using NodeRef = const MachineBasicBlock *;
   using ChildIteratorType = MachineBasicBlock::const_pred_iterator;
 
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4fcb7f36e0238..ab9fe82bc7917 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -78,9 +78,9 @@ class MachineInstr
   /// otherwise easily derivable from the IR text.
   ///
   enum CommentFlag {
-    ReloadReuse = 0x1,    // higher bits are reserved for target dep comments.
+    ReloadReuse = 0x1, // higher bits are reserved for target dep comments.
     NoSchedComment = 0x2,
-    TAsmComments = 0x4    // Target Asm comments should start from this value.
+    TAsmComments = 0x4 // Target Asm comments should start from this value.
   };
 
   enum MIFlag {
@@ -123,16 +123,17 @@ class MachineInstr
     NoUSWrap = 1 << 20,      // Instruction supports geps
                              // no unsigned signed wrap.
     SameSign = 1 << 21,      // Both operands have the same sign.
-    InBounds = 1 << 22       // Pointer arithmetic remains inbounds.
+    InBounds = 1 << 22,      // Pointer arithmetic remains inbounds.
                              // Implies NoUSWrap.
+    Prefetch = 1 << 23,      // Instruction is a prefetch.
   };
 
 private:
-  const MCInstrDesc *MCID;              // Instruction descriptor.
-  MachineBasicBlock *Parent = nullptr;  // Pointer to the owning basic block.
+  const MCInstrDesc *MCID;             // Instruction descriptor.
+  MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block.
 
   // Operands are allocated by an ArrayRecycler.
-  MachineOperand *Operands = nullptr;   // Pointer to the first operand.
+  MachineOperand *Operands = nullptr; // Pointer to the first operand.
 
 #define LLVM_MI_NUMOPERANDS_BITS 24
 #define LLVM_MI_FLAGS_BITS 24
@@ -144,7 +145,7 @@ class MachineInstr
   // OperandCapacity has uint8_t size, so it should be next to NumOperands
   // to properly pack.
   using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
-  OperandCapacity CapOperands;          // Capacity of the Operands array.
+  OperandCapacity CapOperands; // Capacity of the Operands array.
 
   /// Various bits of additional information about the machine instruction.
   uint32_t Flags : LLVM_MI_FLAGS_BITS;
@@ -226,9 +227,8 @@ class MachineInstr
     }
 
     MDNode *getPCSections() const {
-      return HasPCSections
-                 ? g...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/166324


More information about the llvm-commits mailing list