[llvm] Adding Matching and Inference Functionality to Propeller-PR4: Implement matching and inference and create clusters (PR #165868)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 31 08:20:48 PDT 2025


https://github.com/wdx727 updated https://github.com/llvm/llvm-project/pull/165868

>From 813b3621a12f90f59ebcedd1fc48978474e4a4d3 Mon Sep 17 00:00:00 2001
From: wudexin <wudexin at kuaishou.com>
Date: Wed, 24 Sep 2025 18:42:21 +0800
Subject: [PATCH] Adding Matching and Inference Functionality to Propeller-PR4:
 Implement matching and inference and create clusters.

Co-authored-by: lifengxiang1025 <lifengxiang at kuaishou.com>
Co-authored-by: zcfh <wuminghui03 at kuaishou.com>
---
 .../CodeGen/BasicBlockMatchingAndInference.h  |  62 ++++++
 .../CodeGen/BasicBlockSectionsProfileReader.h |   7 +
 .../llvm/CodeGen/MachineBlockHashInfo.h       |   2 +
 llvm/include/llvm/CodeGen/Passes.h            |   4 +
 llvm/include/llvm/InitializePasses.h          |   1 +
 .../Transforms/Utils/SampleProfileInference.h |  16 ++
 .../BasicBlockMatchingAndInference.cpp        | 187 ++++++++++++++++++
 llvm/lib/CodeGen/BasicBlockSections.cpp       |  88 ++++++++-
 .../BasicBlockSectionsProfileReader.cpp       |  15 ++
 llvm/lib/CodeGen/CMakeLists.txt               |   1 +
 llvm/lib/CodeGen/TargetPassConfig.cpp         |  13 +-
 .../Utils/SampleProfileInference.cpp          |   2 -
 .../basic-block-sections-clusters-bb-hash.ll  |  99 ++++++++++
 13 files changed, 488 insertions(+), 9 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h
 create mode 100644 llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp
 create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll

diff --git a/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h b/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h
new file mode 100644
index 0000000000000..6e9bbb969a445
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BasicBlockMatchingAndInference.h
@@ -0,0 +1,62 @@
+//===- llvm/CodeGen/BasicBlockMatchingAndInference.h ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Infer weights for all basic blocks using matching and inference.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H
+#define LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H
+
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
+
+namespace llvm {
+
+class BasicBlockMatchingAndInference : public MachineFunctionPass {
+private:
+  using Edge = std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
+  using BlockWeightMap = DenseMap<const MachineBasicBlock *, uint64_t>;
+  using EdgeWeightMap = DenseMap<Edge, uint64_t>;
+  using BlockEdgeMap = DenseMap<const MachineBasicBlock *,
+                                SmallVector<const MachineBasicBlock *, 8>>;
+
+  struct WeightInfo {
+    // Weight of basic blocks.
+    BlockWeightMap BlockWeights;
+    // Weight of edges.
+    EdgeWeightMap EdgeWeights;
+  };
+
+public:
+  static char ID;
+  BasicBlockMatchingAndInference();
+
+  StringRef getPassName() const override {
+    return "Basic Block Matching and Inference";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+
+  std::optional<WeightInfo> getWeightInfo(StringRef FuncName) const;
+
+private:
+  StringMap<WeightInfo> ProgramWeightInfo;
+
+  WeightInfo initWeightInfoByMatching(MachineFunction &MF);
+
+  void generateWeightInfoByInference(MachineFunction &MF,
+                                     WeightInfo &MatchWeight);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_BASIC_BLOCK_AND_INFERENCE_H
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 823753021ff74..e8422e22aca0e 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -90,6 +90,10 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
+  // Return the complete function path and cluster info for the given function.
+  std::pair<bool, FunctionPathAndClusterInfo>
+  getFunctionPathAndClusterInfo(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -199,6 +203,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
 
+  std::pair<bool, FunctionPathAndClusterInfo>
+  getFunctionPathAndClusterInfo(StringRef FuncName) const;
+
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
   bool doInitialization(Module &M) override;
diff --git a/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h
index d044d5f940b75..6f26819d566ae 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockHashInfo.h
@@ -80,6 +80,8 @@ struct BlendedBlockHash {
     return Dist;
   }
 
+  uint16_t getOpcodeHash() const { return OpcodeHash; }
+
 private:
   /// The offset of the basic block from the function start.
   uint16_t Offset{0};
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index a8525554b142e..2bf83cfa655b6 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -69,6 +69,10 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
 
 LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
 
+/// createBasicBlockMatchingAndInferencePass - This pass enables matching
+/// and inference when using propeller.
+LLVM_ABI MachineFunctionPass *createBasicBlockMatchingAndInferencePass();
+
 /// createMachineBlockHashInfoPass - This pass computes basic block hashes.
 LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 581b4ad161daa..9360550875219 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -55,6 +55,7 @@ LLVM_ABI void initializeAlwaysInlinerLegacyPassPass(PassRegistry &);
 LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
 LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
 LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
+LLVM_ABI void initializeBasicBlockMatchingAndInferencePass(PassRegistry &);
 LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
 LLVM_ABI void
 initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index 7231e45fe8eb7..e1663d29c1e3c 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -130,6 +130,11 @@ template <typename FT> class SampleProfileInference {
   SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,
                          BlockWeightMap &SampleBlockWeights)
       : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {}
+  SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,
+                         BlockWeightMap &SampleBlockWeights,
+                         EdgeWeightMap &SampleEdgeWeights)
+      : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights),
+        SampleEdgeWeights(SampleEdgeWeights) {}
 
   /// Apply the profile inference algorithm for a given function
   void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
@@ -157,6 +162,9 @@ template <typename FT> class SampleProfileInference {
 
   /// Map basic blocks to their sampled weights.
   BlockWeightMap &SampleBlockWeights;
+
+  /// Map edges to their sampled weights.
+  EdgeWeightMap SampleEdgeWeights;
 };
 
 template <typename BT>
@@ -266,6 +274,14 @@ FlowFunction SampleProfileInference<BT>::createFlowFunction(
       FlowJump Jump;
       Jump.Source = BlockIndex[BB];
       Jump.Target = BlockIndex[Succ];
+      auto It = SampleEdgeWeights.find(std::make_pair(BB, Succ));
+      if (It != SampleEdgeWeights.end()) {
+        Jump.HasUnknownWeight = false;
+        Jump.Weight = It->second;
+      } else {
+        Jump.HasUnknownWeight = true;
+        Jump.Weight = 0;
+      }
       Func.Jumps.push_back(Jump);
     }
   }
diff --git a/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp b/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp
new file mode 100644
index 0000000000000..e0335f6a0b79f
--- /dev/null
+++ b/llvm/lib/CodeGen/BasicBlockMatchingAndInference.cpp
@@ -0,0 +1,187 @@
+//===- llvm/CodeGen/BasicBlockMatchingAndInference.cpp ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Infer weights for all basic blocks using matching and inference.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BasicBlockMatchingAndInference.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBlockHashInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include <llvm/Support/CommandLine.h>
+
+using namespace llvm;
+
+static cl::opt<float>
+    PropellerInferThreshold("propeller-infer-threshold",
+                            cl::desc("Threshold for infer stale profile"),
+                            cl::init(0.6), cl::Optional);
+
+/// The object is used to identify and match basic blocks given their hashes.
+class StaleMatcher {
+public:
+  /// Initialize stale matcher.
+  void init(const std::vector<MachineBasicBlock *> &Blocks,
+            const std::vector<BlendedBlockHash> &Hashes) {
+    assert(Blocks.size() == Hashes.size() &&
+           "incorrect matcher initialization");
+    for (size_t I = 0; I < Blocks.size(); I++) {
+      MachineBasicBlock *Block = Blocks[I];
+      uint16_t OpHash = Hashes[I].getOpcodeHash();
+      OpHashToBlocks[OpHash].push_back(std::make_pair(Hashes[I], Block));
+    }
+  }
+
+  /// Find the most similar block for a given hash.
+  MachineBasicBlock *matchBlock(BlendedBlockHash BlendedHash) const {
+    auto BlockIt = OpHashToBlocks.find(BlendedHash.getOpcodeHash());
+    if (BlockIt == OpHashToBlocks.end()) {
+      return nullptr;
+    }
+    MachineBasicBlock *BestBlock = nullptr;
+    uint64_t BestDist = std::numeric_limits<uint64_t>::max();
+    for (auto It : BlockIt->second) {
+      MachineBasicBlock *Block = It.second;
+      BlendedBlockHash Hash = It.first;
+      uint64_t Dist = Hash.distance(BlendedHash);
+      if (BestBlock == nullptr || Dist < BestDist) {
+        BestDist = Dist;
+        BestBlock = Block;
+      }
+    }
+    return BestBlock;
+  }
+
+private:
+  using HashBlockPairType = std::pair<BlendedBlockHash, MachineBasicBlock *>;
+  std::unordered_map<uint16_t, std::vector<HashBlockPairType>> OpHashToBlocks;
+};
+
+INITIALIZE_PASS_BEGIN(BasicBlockMatchingAndInference,
+                      "machine-block-match-infer",
+                      "Machine Block Matching and Inference Analysis", true,
+                      true)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockHashInfo)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(BasicBlockMatchingAndInference, "machine-block-match-infer",
+                    "Machine Block Matching and Inference Analysis", true, true)
+
+char BasicBlockMatchingAndInference::ID = 0;
+
+BasicBlockMatchingAndInference::BasicBlockMatchingAndInference()
+    : MachineFunctionPass(ID) {
+  initializeBasicBlockMatchingAndInferencePass(
+      *PassRegistry::getPassRegistry());
+}
+
+void BasicBlockMatchingAndInference::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBlockHashInfo>();
+  AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+std::optional<BasicBlockMatchingAndInference::WeightInfo>
+BasicBlockMatchingAndInference::getWeightInfo(StringRef FuncName) const {
+  auto It = ProgramWeightInfo.find(FuncName);
+  if (It == ProgramWeightInfo.end()) {
+    return std::nullopt;
+  }
+  return It->second;
+}
+
+BasicBlockMatchingAndInference::WeightInfo
+BasicBlockMatchingAndInference::initWeightInfoByMatching(MachineFunction &MF) {
+  std::vector<MachineBasicBlock *> Blocks;
+  std::vector<BlendedBlockHash> Hashes;
+  auto BSPR = &getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
+  auto MBHI = &getAnalysis<MachineBlockHashInfo>();
+  for (auto &Block : MF) {
+    Blocks.push_back(&Block);
+    Hashes.push_back(BlendedBlockHash(MBHI->getMBBHash(Block)));
+  }
+  StaleMatcher Matcher;
+  Matcher.init(Blocks, Hashes);
+  BasicBlockMatchingAndInference::WeightInfo MatchWeight;
+  auto [Flag, PathAndClusterInfo] =
+      BSPR->getFunctionPathAndClusterInfo(MF.getName());
+  if (!Flag)
+    return MatchWeight;
+  for (auto &BlockCount : PathAndClusterInfo.NodeCounts) {
+    if (PathAndClusterInfo.BBHashes.count(BlockCount.first.BaseID)) {
+      auto Hash = PathAndClusterInfo.BBHashes[BlockCount.first.BaseID];
+      MachineBasicBlock *Block = Matcher.matchBlock(BlendedBlockHash(Hash));
+      // When a basic block has clone copies, sum their counts.
+      if (Block != nullptr)
+        MatchWeight.BlockWeights[Block] += BlockCount.second;
+    }
+  }
+  for (auto &PredItem : PathAndClusterInfo.EdgeCounts) {
+    auto PredID = PredItem.first.BaseID;
+    if (!PathAndClusterInfo.BBHashes.count(PredID))
+      continue;
+    auto PredHash = PathAndClusterInfo.BBHashes[PredID];
+    MachineBasicBlock *PredBlock =
+        Matcher.matchBlock(BlendedBlockHash(PredHash));
+    if (PredBlock == nullptr)
+      continue;
+    for (auto &SuccItem : PredItem.second) {
+      auto SuccID = SuccItem.first.BaseID;
+      auto EdgeWeight = SuccItem.second;
+      if (PathAndClusterInfo.BBHashes.count(SuccID)) {
+        auto SuccHash = PathAndClusterInfo.BBHashes[SuccID];
+        MachineBasicBlock *SuccBlock =
+            Matcher.matchBlock(BlendedBlockHash(SuccHash));
+        // When an edge has clone copies, sum their counts.
+        if (SuccBlock != nullptr)
+          MatchWeight.EdgeWeights[std::make_pair(PredBlock, SuccBlock)] +=
+              EdgeWeight;
+      }
+    }
+  }
+  return MatchWeight;
+}
+
+void BasicBlockMatchingAndInference::generateWeightInfoByInference(
+    MachineFunction &MF,
+    BasicBlockMatchingAndInference::WeightInfo &MatchWeight) {
+  BlockEdgeMap Successors;
+  for (auto &Block : MF) {
+    for (auto *Succ : Block.successors())
+      Successors[&Block].push_back(Succ);
+  }
+  SampleProfileInference<MachineFunction> SPI(
+      MF, Successors, MatchWeight.BlockWeights, MatchWeight.EdgeWeights);
+  BlockWeightMap BlockWeights;
+  EdgeWeightMap EdgeWeights;
+  SPI.apply(BlockWeights, EdgeWeights);
+  ProgramWeightInfo.try_emplace(
+      MF.getName(), BasicBlockMatchingAndInference::WeightInfo{
+                        std::move(BlockWeights), std::move(EdgeWeights)});
+}
+
+bool BasicBlockMatchingAndInference::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.empty())
+    return false;
+  auto MatchWeight = initWeightInfoByMatching(MF);
+  // If the ratio of the number of MBBs in matching to the total number of MBBs
+  // in the function is less than the threshold value, the processing should be
+  // abandoned.
+  if (static_cast<float>(MatchWeight.BlockWeights.size()) / MF.size() <
+      PropellerInferThreshold) {
+    return false;
+  }
+  generateWeightInfoByInference(MF, MatchWeight);
+  return false;
+}
+
+MachineFunctionPass *llvm::createBasicBlockMatchingAndInferencePass() {
+  return new BasicBlockMatchingAndInference();
+}
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index e317e1c06741f..a86ac6d6eab23 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -70,6 +70,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockMatchingAndInference.h"
 #include "llvm/CodeGen/BasicBlockSectionUtils.h"
 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -81,6 +82,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/UniqueBBID.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/CodeLayout.h"
 #include <optional>
 
 using namespace llvm;
@@ -175,6 +177,77 @@ updateBranches(MachineFunction &MF,
   }
 }
 
+// This function generates the machine basic block clusters of "hot" blocks.
+// Currently, only support one cluster creation.
+// TODO: Support multi-cluster creation and path cloning.
+static std::pair<bool, SmallVector<BBClusterInfo>>
+createBBClusterInfoForFunction(const MachineFunction &MF,
+                               BasicBlockMatchingAndInference *BMI) {
+  unsigned CurrentCluster = 0;
+  auto OptWeightInfo = BMI->getWeightInfo(MF.getName());
+  if (!OptWeightInfo)
+    return std::pair(false, SmallVector<BBClusterInfo>{});
+  auto BlockWeights = OptWeightInfo->BlockWeights;
+  auto EdgeWeights = OptWeightInfo->EdgeWeights;
+
+  SmallVector<const MachineBasicBlock *, 4> HotMBBs;
+  if (MF.size() <= 2) {
+    for (auto &MBB : MF) {
+      if (MBB.isEntryBlock() || BlockWeights[&MBB] > 0) {
+        HotMBBs.push_back(&MBB);
+      }
+    }
+  } else {
+    SmallVector<uint64_t, 0> BlockSizes(MF.size());
+    SmallVector<uint64_t, 0> BlockCounts(MF.size());
+    std::vector<const MachineBasicBlock *> OrigOrder;
+    OrigOrder.reserve(MF.size());
+    SmallVector<codelayout::EdgeCount, 0> JumpCounts;
+
+    // Init the MBB size and count.
+    for (auto &MBB : MF) {
+      auto NonDbgInsts =
+          instructionsWithoutDebug(MBB.instr_begin(), MBB.instr_end());
+      int NumInsts = std::distance(NonDbgInsts.begin(), NonDbgInsts.end());
+      BlockSizes[MBB.getNumber()] = 4 * NumInsts;
+      BlockCounts[MBB.getNumber()] = BlockWeights[&MBB];
+      OrigOrder.push_back(&MBB);
+    }
+
+    // Init the edge count.
+    for (auto &MBB : MF) {
+      for (auto *Succ : MBB.successors()) {
+        auto EdgeWeight = EdgeWeights[std::make_pair(&MBB, Succ)];
+        JumpCounts.push_back({static_cast<uint64_t>(MBB.getNumber()),
+                              static_cast<uint64_t>(Succ->getNumber()),
+                              EdgeWeight});
+      }
+    }
+
+    // Run the layout algorithm.
+    auto Result = computeExtTspLayout(BlockSizes, BlockCounts, JumpCounts);
+    for (uint64_t R : Result) {
+      auto Block = OrigOrder[R];
+      if (Block->isEntryBlock() || BlockWeights[Block] > 0)
+        HotMBBs.push_back(Block);
+    }
+  }
+
+  // Generate the "hot" basic block cluster.
+  if (!HotMBBs.empty()) {
+    SmallVector<BBClusterInfo, 4> BBClusterInfos;
+    unsigned CurrentPosition = 0;
+    for (auto &MBB : HotMBBs) {
+      if (MBB->getBBID()) {
+        BBClusterInfos.push_back(
+            {*(MBB->getBBID()), CurrentCluster, CurrentPosition++});
+      }
+    }
+    return std::pair(true, std::move(BBClusterInfos));
+  }
+  return std::pair(false, SmallVector<BBClusterInfo>{});
+}
+
 // This function sorts basic blocks according to the cluster's information.
 // All explicitly specified clusters of basic blocks will be ordered
 // accordingly. All non-specified BBs go into a separate "Cold" section.
@@ -314,12 +387,16 @@ bool BasicBlockSections::handleBBSections(MachineFunction &MF) {
 
   DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo;
   if (BBSectionsType == BasicBlockSection::List) {
-    auto [HasProfile, ClusterInfo] =
-        getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-            .getClusterInfoForFunction(MF.getName());
-    if (!HasProfile)
+    std::pair<bool, SmallVector<BBClusterInfo>> ExpClusterInfo;
+    if (auto *BMI = getAnalysisIfAvailable<BasicBlockMatchingAndInference>()) {
+      ExpClusterInfo = createBBClusterInfoForFunction(MF, BMI);
+    } else {
+      ExpClusterInfo = getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+                           .getClusterInfoForFunction(MF.getName());
+    }
+    if (!ExpClusterInfo.first)
       return false;
-    for (auto &BBClusterInfo : ClusterInfo) {
+    for (auto &BBClusterInfo : ExpClusterInfo.second) {
       FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo);
     }
   }
@@ -402,6 +479,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
 void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+  AU.addUsedIfAvailable<BasicBlockMatchingAndInference>();
   AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>();
   AU.addUsedIfAvailable<MachinePostDominatorTreeWrapperPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 485b44ae4c4aa..7168906deffca 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -91,6 +91,15 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
+std::pair<bool, FunctionPathAndClusterInfo>
+BasicBlockSectionsProfileReader::getFunctionPathAndClusterInfo(
+    StringRef FuncName) const {
+  auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
+  return R != ProgramPathAndClusterInfo.end()
+             ? std::pair(true, R->second)
+             : std::pair(false, FunctionPathAndClusterInfo());
+}
+
 // Reads the version 1 basic block sections profile. Profile for each function
 // is encoded as follows:
 //   m <module_name>
@@ -512,6 +521,12 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
+std::pair<bool, FunctionPathAndClusterInfo>
+BasicBlockSectionsProfileReaderWrapperPass::getFunctionPathAndClusterInfo(
+    StringRef FuncName) const {
+  return BBSPR.getFunctionPathAndClusterInfo(FuncName);
+}
+
 BasicBlockSectionsProfileReader &
 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
   return BBSPR;
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 4373c5397a3c6..17e27052b7b24 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_component_library(LLVMCodeGen
   BasicBlockSections.cpp
   BasicBlockPathCloning.cpp
   BasicBlockSectionsProfileReader.cpp
+  BasicBlockMatchingAndInference.cpp
   CalcSpillWeights.cpp
   CallBrPrepare.cpp
   CallingConvLower.cpp
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 10b723887b21f..0f1fb506d4a73 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -272,6 +272,12 @@ static cl::opt<bool>
                     cl::desc("Split static data sections into hot and cold "
                              "sections using profile information"));
 
+/// Enable matching and inference when using propeller.
+static cl::opt<bool>
+    PropellerMatchInfer("propeller-match-infer",
+                        cl::desc("Use match&infer to evaluate stale profile"),
+                        cl::init(false), cl::Optional);
+
 cl::opt<bool> EmitBBHash(
     "emit-bb-hash",
     cl::desc(
@@ -1287,12 +1293,15 @@ void TargetPassConfig::addMachinePasses() {
   // address map (or both).
   if (TM->getBBSectionsType() != llvm::BasicBlockSection::None ||
       TM->Options.BBAddrMap) {
-    if (EmitBBHash)
+    if (EmitBBHash || PropellerMatchInfer)
       addPass(llvm::createMachineBlockHashInfoPass());
     if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
       addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
           TM->getBBSectionsFuncListBuf()));
-      addPass(llvm::createBasicBlockPathCloningPass());
+      if (PropellerMatchInfer)
+        addPass(llvm::createBasicBlockMatchingAndInferencePass());
+      else
+        addPass(llvm::createBasicBlockPathCloningPass());
     }
     addPass(llvm::createBasicBlockSectionsPass());
   }
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 53bcaa6d3df03..934d1589c4a2e 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -1174,8 +1174,6 @@ std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
     else
       CostInc = Params.CostJumpUnknownInc;
     CostDec = 0;
-  } else {
-    assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
   }
   return std::make_pair(CostInc, CostDec);
 }
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll
new file mode 100644
index 0000000000000..0ce3a522b932d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-bb-hash.ll
@@ -0,0 +1,99 @@
+; BB cluster section tests when using edges profile and basic block hashes to generate clusters.
+;
+; Test1: Basic blocks #0 (entry), #1 and #3 will be placed in the same section.
+; The rest will be placed in the cold section.
+;
+; RUN: llc %s -O0 -mtriple=x86_64-pc-linux -function-sections -filetype=obj -basic-block-address-map -emit-bb-hash -o %t.o
+;
+; RUN: echo 'v1' > %t1
+; RUN: echo 'f foo' >> %t1
+; RUN: echo 'g 0:100,1:100,2:0 1:100,3:100 2:0,3:0 3:100' >> %t1
+;
+; These commands read BB hashes from SHT_LLVM_BB_ADDR_MAP 
+; and put them into the basic blocks sections profile.
+; RUN: llvm-readobj %t.o --bb-addr-map | \
+; RUN: awk 'BEGIN {printf "h"} \
+; RUN:     /ID: [0-9]+/ {id=$2} \
+; RUN:     /Hash: 0x[0-9A-Fa-f]+/ {gsub(/^0x/, "", $2); hash=$2; printf " %%s:%%s", id, hash} \
+; RUN:     END {print ""}' \
+; RUN: >> %t1
+;
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t1 -propeller-match-infer | \
+; RUN: FileCheck %s -check-prefix=LINUX-SECTIONS1
+;
+; Test2: Basic #0 (entry), #2 and #3 will be placed in the same section.
+; The rest will be placed in the cold section.
+;
+; RUN: echo 'v1' > %t2
+; RUN: echo 'f foo' >> %t2
+; RUN: echo 'g 0:100,1:0,2:100 1:0,3:0 2:100,3:100 3:100' >> %t2
+;
+; These commands read BB hashes from SHT_LLVM_BB_ADDR_MAP 
+; and put them into the basic blocks sections profile.
+; RUN: llvm-readobj %t.o --bb-addr-map | \
+; RUN: awk 'BEGIN {printf "h"} \
+; RUN:     /ID: [0-9]+/ {id=$2} \
+; RUN:     /Hash: 0x[0-9A-Fa-f]+/ {gsub(/^0x/, "", $2); hash=$2; printf " %%s:%%s", id, hash} \
+; RUN:     END {print ""}' \
+; RUN: >> %t2
+;
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t2 -propeller-match-infer | \
+; RUN: FileCheck %s -check-prefix=LINUX-SECTIONS2
+
+define void @foo(i1 zeroext) nounwind {
+  %2 = alloca i8, align 1
+  %3 = zext i1 %0 to i8
+  store i8 %3, ptr %2, align 1
+  %4 = load i8, ptr %2, align 1
+  %5 = trunc i8 %4 to i1
+  br i1 %5, label %6, label %8
+
+6:                                                ; preds = %1
+  %7 = call i32 @bar()
+  br label %10
+
+8:                                                ; preds = %1
+  %9 = call i32 @baz()
+  br label %10
+
+10:                                               ; preds = %8, %6
+  ret void
+}
+
+declare i32 @bar() #1
+
+declare i32 @baz() #1
+
+; LINUX-SECTIONS1:         .section        .text.foo,"ax", at progbits
+; LINUX-SECTIONS1-NOT:     .section
+; LINUX-SECTIONS1-LABEL:   foo:
+; LINUX-SECTIONS1-NOT:     .section
+; LINUX-SECTIONS1-NOT:     .LBB_END0_{{0-9}}+
+; LINUX-SECTIONS1-LABEL:   # %bb.1:
+; LINUX-SECTIONS1-NOT:     .section
+; LINUX-SECTIONS1-NOT:     .LBB_END0_{{0-9}}+
+; LINUX-SECTIONS1-LABEL:   .LBB0_3:
+; LINUX-SECTIONS1-LABEL:   .LBB_END0_3:
+; LINUX-SECTIONS1-NEXT:    .section        .text.split.foo,"ax", at progbits
+; LINUX-SECTIONS1-LABEL:   foo.cold:
+; LINUX-SECTIONS1-LABEL:   .LBB_END0_2:
+; LINUX-SECTIONS1-NEXT:    .size   foo.cold, .LBB_END0_2-foo.cold
+; LINUX-SECTIONS1-LABEL:   .Lfunc_end0:
+; LINUX-SECTIONS1-NEXT:    .size foo, .Lfunc_end0-foo
+
+; LINUX-SECTIONS2:         .section        .text.foo,"ax", at progbits
+; LINUX-SECTIONS2-NOT:     .section
+; LINUX-SECTIONS2-LABEL:   foo:
+; LINUX-SECTIONS2-NOT:     .section
+; LINUX-SECTIONS2-NOT:     .LBB_END0_{{0-9}}+
+; LINUX-SECTIONS2-LABEL:   # %bb.2:
+; LINUX-SECTIONS2-NOT:     .section
+; LINUX-SECTIONS2-NOT:     .LBB_END0_{{0-9}}+
+; LINUX-SECTIONS2-LABEL:   .LBB0_3:
+; LINUX-SECTIONS2-LABEL:   .LBB_END0_3:
+; LINUX-SECTIONS2-NEXT:    .section        .text.split.foo,"ax", at progbits
+; LINUX-SECTIONS2-LABEL:   foo.cold:
+; LINUX-SECTIONS2-LABEL:   .LBB_END0_1:
+; LINUX-SECTIONS2-NEXT:    .size   foo.cold, .LBB_END0_1-foo.cold
+; LINUX-SECTIONS2-LABEL:   .Lfunc_end0:
+; LINUX-SECTIONS2-NEXT:    .size foo, .Lfunc_end0-foo



More information about the llvm-commits mailing list