[llvm] f70e39e - [BasicBlockSections] Apply path cloning with -basic-block-sections. (#68860)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 27 21:49:43 PDT 2023


Author: Rahman Lavaee
Date: 2023-10-27T21:49:39-07:00
New Revision: f70e39ec173192058976805a2c51ac438bb2ff2f

URL: https://github.com/llvm/llvm-project/commit/f70e39ec173192058976805a2c51ac438bb2ff2f
DIFF: https://github.com/llvm/llvm-project/commit/f70e39ec173192058976805a2c51ac438bb2ff2f.diff

LOG: [BasicBlockSections] Apply path cloning with -basic-block-sections. (#68860)

https://github.com/llvm/llvm-project/commit/28b912687900bc0a67cd61c374fce296b09963c4
introduced the path cloning format in the basic-block-sections profile.

This PR validates and applies path clonings. 
A path cloning is valid if all of these conditions hold:
  1. All bb ids in the path are mapped to existing blocks.
2. Each two consecutive bb ids in the path have a successor relationship
in the CFG.
3. The path does not include a block with indirect branches, except
possibly as the last block.
 
Applying a path cloning involves cloning all blocks in the path (except
the first one) and setting up their branches.
Once all clonings are applied, the cluster information is used to guide
block layout in the modified function.

Added: 
    llvm/lib/CodeGen/BasicBlockPathCloning.cpp
    llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll
    llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll
    llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll
    llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll
    llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll

Modified: 
    llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h
    llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
    llvm/include/llvm/CodeGen/MachineBasicBlock.h
    llvm/include/llvm/CodeGen/MachineFunction.h
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/InitializePasses.h
    llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
    llvm/lib/CodeGen/BasicBlockSections.cpp
    llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
    llvm/lib/CodeGen/CMakeLists.txt
    llvm/lib/CodeGen/CodeGen.cpp
    llvm/lib/CodeGen/MIRParser/MIParser.cpp
    llvm/lib/CodeGen/MachineBasicBlock.cpp
    llvm/lib/CodeGen/MachineFunction.cpp
    llvm/lib/CodeGen/TargetInstrInfo.cpp
    llvm/lib/CodeGen/TargetPassConfig.cpp
    llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h
index d43f399b2c310a3..292abf8b2b51628 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h
@@ -27,6 +27,15 @@ void sortBasicBlocksAndUpdateBranches(MachineFunction &MF,
 
 void avoidZeroOffsetLandingPad(MachineFunction &MF);
 
+/// This checks if the source of this function has drifted since this binary was
+/// profiled previously.
+/// For now, we are piggy backing on what PGO does to
+/// detect this with instrumented profiles.  PGO emits an hash of the IR and
+/// checks if the hash has changed.  Advanced basic block layout is usually done
+/// on top of PGO optimized binaries and hence this check works well in
+/// practice.
+bool hasInstrProfHashMismatch(MachineFunction &MF);
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H

diff  --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 6e01dfd11ee6dad..dfb8d5d9f2f5d33 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -19,33 +19,22 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
 
 namespace llvm {
 
-// This structure represents a unique ID for every block specified in the
-// input profile.
-struct ProfileBBID {
-  // Basic block id associated with `MachineBasicBlock::BBID`.
-  unsigned BBID;
-  // The clone id associated with the block. This is zero for the original
-  // block. For the cloned ones, it is equal to 1 + index of the associated
-  // path in `FunctionPathAndClusterInfo::ClonePaths`.
-  unsigned CloneID;
-};
-
 // This struct represents the cluster information for a machine basic block,
-// which is specifed by a unique ID. This templated struct is used for both the
-// raw input profile (as `BBClusterInfo<ProfileBBID>`) and the processed profile
-// after applying the clonings (as `BBClusterInfo<unsigned>`).
-template <typename BBIDType> struct BBClusterInfo {
+// which is specifed by a unique ID (`MachineBasicBlock::BBID`).
+struct BBClusterInfo {
   // Basic block ID.
-  BBIDType BasicBlockID;
+  UniqueBBID BBID;
   // Cluster ID this basic block belongs to.
   unsigned ClusterID;
   // Position of basic block within the cluster.
@@ -54,31 +43,31 @@ template <typename BBIDType> struct BBClusterInfo {
 
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
-  // BB Cluster information specified by `ProfileBBID`s (before cloning).
-  SmallVector<BBClusterInfo<ProfileBBID>> ClusterInfo;
+  // BB Cluster information specified by `UniqueBBID`s.
+  SmallVector<BBClusterInfo> ClusterInfo;
   // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
   // the edge a -> b (a is not cloned). The index of the path in this vector
-  // determines the `ProfileBBID::CloneID` of the cloned blocks in that path.
+  // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
 };
 
-// Provides DenseMapInfo for ProfileBBID.
-template <> struct DenseMapInfo<ProfileBBID> {
-  static inline ProfileBBID getEmptyKey() {
+// Provides DenseMapInfo for UniqueBBID.
+template <> struct DenseMapInfo<UniqueBBID> {
+  static inline UniqueBBID getEmptyKey() {
     unsigned EmptyKey = DenseMapInfo<unsigned>::getEmptyKey();
-    return ProfileBBID{EmptyKey, EmptyKey};
+    return UniqueBBID{EmptyKey, EmptyKey};
   }
-  static inline ProfileBBID getTombstoneKey() {
+  static inline UniqueBBID getTombstoneKey() {
     unsigned TombstoneKey = DenseMapInfo<unsigned>::getTombstoneKey();
-    return ProfileBBID{TombstoneKey, TombstoneKey};
+    return UniqueBBID{TombstoneKey, TombstoneKey};
   }
-  static unsigned getHashValue(const ProfileBBID &Val) {
+  static unsigned getHashValue(const UniqueBBID &Val) {
     std::pair<unsigned, unsigned> PairVal =
-        std::make_pair(Val.BBID, Val.CloneID);
+        std::make_pair(Val.BaseID, Val.CloneID);
     return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
   }
-  static bool isEqual(const ProfileBBID &LHS, const ProfileBBID &RHS) {
-    return DenseMapInfo<unsigned>::isEqual(LHS.BBID, RHS.BBID) &&
+  static bool isEqual(const UniqueBBID &LHS, const UniqueBBID &RHS) {
+    return DenseMapInfo<unsigned>::isEqual(LHS.BaseID, RHS.BaseID) &&
            DenseMapInfo<unsigned>::isEqual(LHS.CloneID, RHS.CloneID);
   }
 };
@@ -113,8 +102,12 @@ class BasicBlockSectionsProfileReader : public ImmutablePass {
   // function. If the first element is true and the second element is empty, it
   // means unique basic block sections are desired for all basic blocks of the
   // function.
-  std::pair<bool, FunctionPathAndClusterInfo>
-  getPathAndClusterInfoForFunction(StringRef FuncName) const;
+  std::pair<bool, SmallVector<BBClusterInfo>>
+  getClusterInfoForFunction(StringRef FuncName) const;
+
+  // Returns the path clonings for the given function.
+  SmallVector<SmallVector<unsigned>>
+  getClonePathsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
@@ -134,11 +127,11 @@ class BasicBlockSectionsProfileReader : public ImmutablePass {
         inconvertibleErrorCode());
   }
 
-  // Parses a `ProfileBBID` from `S`. `S` must be in the form "<bbid>"
+  // Parses a `UniqueBBID` from `S`. `S` must be in the form "<bbid>"
   // (representing an original block) or "<bbid>.<cloneid>" (representing a
   // cloned block) where bbid is a non-negative integer and cloneid is a
   // positive integer.
-  Expected<ProfileBBID> parseProfileBBID(StringRef S) const;
+  Expected<UniqueBBID> parseUniqueBBID(StringRef S) const;
 
   // Reads the basic block sections profile for functions in this module.
   Error ReadProfile();

diff  --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 15c4fcd8399c181..4b5336fac33ea46 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -74,6 +74,13 @@ struct MBBSectionID {
   MBBSectionID(SectionType T) : Type(T), Number(0) {}
 };
 
+// This structure represents the information for a basic block.
+struct UniqueBBID {
+  unsigned BaseID;
+  // sections profile).
+  unsigned CloneID;
+};
+
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -180,7 +187,7 @@ class MachineBasicBlock
 
   /// Fixed unique ID assigned to this basic block upon creation. Used with
   /// basic block sections and basic block labels.
-  std::optional<unsigned> BBID;
+  std::optional<UniqueBBID> BBID;
 
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
@@ -633,7 +640,7 @@ class MachineBasicBlock
 
   void setIsEndSection(bool V = true) { IsEndSection = V; }
 
-  std::optional<unsigned> getBBID() const { return BBID; }
+  std::optional<UniqueBBID> getBBID() const { return BBID; }
 
   /// Returns the section ID of this basic block.
   MBBSectionID getSectionID() const { return SectionID; }
@@ -645,7 +652,7 @@ class MachineBasicBlock
   }
 
   /// Sets the fixed BBID of this basic block.
-  void setBBID(unsigned V) {
+  void setBBID(const UniqueBBID &V) {
     assert(!BBID.has_value() && "Cannot change BBID.");
     BBID = V;
   }
@@ -753,7 +760,7 @@ class MachineBasicBlock
   ///
   /// This is useful when doing a partial clone of successors. Afterward, the
   /// probabilities may need to be normalized.
-  void copySuccessor(MachineBasicBlock *Orig, succ_iterator I);
+  void copySuccessor(const MachineBasicBlock *Orig, succ_iterator I);
 
   /// Split the old successor into old plus new and updates the probability
   /// info.

diff  --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 8e3253d492dc950..05c9b14a423cda1 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -1013,8 +1013,11 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
   void deleteMachineInstr(MachineInstr *MI);
 
   /// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
-  /// instead of `new MachineBasicBlock'.
-  MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = nullptr);
+  /// instead of `new MachineBasicBlock'. Sets `MachineBasicBlock::BBID` if
+  /// basic-block-sections is enabled for the function.
+  MachineBasicBlock *
+  CreateMachineBasicBlock(const BasicBlock *BB = nullptr,
+                          std::optional<UniqueBBID> BBID = std::nullopt);
 
   /// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
   void deleteMachineBasicBlock(MachineBasicBlock *MBB);

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 8d14eef949e91b4..712048017bca1a1 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -65,6 +65,8 @@ namespace llvm {
   /// basic blocks and is enabled with -fbasic-block-sections.
   MachineFunctionPass *createBasicBlockSectionsPass();
 
+  MachineFunctionPass *createBasicBlockPathCloningPass();
+
   /// createMachineFunctionSplitterPass - This pass splits machine functions
   /// using profile information.
   MachineFunctionPass *createMachineFunctionSplitterPass();

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 9fbd431c227789d..fafae8b5ecd7a7f 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -55,6 +55,7 @@ void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
 void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &);
 void initializeAssumptionCacheTrackerPass(PassRegistry&);
 void initializeAtomicExpandPass(PassRegistry&);
+void initializeBasicBlockPathCloningPass(PassRegistry &);
 void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &);
 void initializeBasicBlockSectionsPass(PassRegistry &);
 void initializeBarrierNoopPass(PassRegistry&);

diff  --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b1a670fa3c2555c..fd440718fd37837 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1372,7 +1372,11 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
     if (BBAddrMapVersion > 1) {
       OutStreamer->AddComment("BB id");
       // Emit the BB ID for this basic block.
-      OutStreamer->emitULEB128IntValue(*MBB.getBBID());
+      // We only emit BaseID since CloneID is unset for
+      // basic-block-sections=labels.
+      // TODO: Emit the full BBID when labels and sections can be mixed
+      // together.
+      OutStreamer->emitULEB128IntValue(MBB.getBBID()->BaseID);
     }
     // Emit the basic block offset relative to the end of the previous block.
     // This is zero unless the block is padded due to alignment.
@@ -1932,30 +1936,33 @@ void AsmPrinter::emitFunctionBody() {
   // MBB profile information has been set
   if (MBBProfileDumpFileOutput && !MF->empty() &&
       MF->getFunction().getEntryCount()) {
-    if (!MF->hasBBLabels())
+    if (!MF->hasBBLabels()) {
       MF->getContext().reportError(
           SMLoc(),
           "Unable to find BB labels for MBB profile dump. -mbb-profile-dump "
           "must be called with -basic-block-sections=labels");
-    MachineBlockFrequencyInfo &MBFI =
-        getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
-    // The entry count and the entry basic block frequency aren't the same. We
-    // want to capture "absolute" frequencies, i.e. the frequency with which a
-    // MBB is executed when the program is executed. From there, we can derive
-    // Function-relative frequencies (divide by the value for the first MBB).
-    // We also have the information about frequency with which functions
-    // were called. This helps, for example, in a type of integration tests
-    // where we want to cross-validate the compiler's profile with a real
-    // profile.
-    // Using double precision because uint64 values used to encode mbb
-    // "frequencies" may be quite large.
-    const double EntryCount =
-        static_cast<double>(MF->getFunction().getEntryCount()->getCount());
-    for (const auto &MBB : *MF) {
-      const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
-      const double AbsMBBFreq = MBBRelFreq * EntryCount;
-      *MBBProfileDumpFileOutput.get()
-          << MF->getName() << "," << MBB.getBBID() << "," << AbsMBBFreq << "\n";
+    } else {
+      MachineBlockFrequencyInfo &MBFI =
+          getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+      // The entry count and the entry basic block frequency aren't the same. We
+      // want to capture "absolute" frequencies, i.e. the frequency with which a
+      // MBB is executed when the program is executed. From there, we can derive
+      // Function-relative frequencies (divide by the value for the first MBB).
+      // We also have the information about frequency with which functions
+      // were called. This helps, for example, in a type of integration tests
+      // where we want to cross-validate the compiler's profile with a real
+      // profile.
+      // Using double precision because uint64 values used to encode mbb
+      // "frequencies" may be quite large.
+      const double EntryCount =
+          static_cast<double>(MF->getFunction().getEntryCount()->getCount());
+      for (const auto &MBB : *MF) {
+        const double MBBRelFreq = MBFI.getBlockFreqRelativeToEntryBlock(&MBB);
+        const double AbsMBBFreq = MBBRelFreq * EntryCount;
+        *MBBProfileDumpFileOutput.get()
+            << MF->getName() << "," << MBB.getBBID()->BaseID << ","
+            << AbsMBBFreq << "\n";
+      }
     }
   }
 }

diff  --git a/llvm/lib/CodeGen/BasicBlockPathCloning.cpp b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
new file mode 100644
index 000000000000000..5d5f3c3da48160d
--- /dev/null
+++ b/llvm/lib/CodeGen/BasicBlockPathCloning.cpp
@@ -0,0 +1,245 @@
+//===-- BasicBlockPathCloning.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// BasicBlockPathCloning implementation.
+///
+/// The purpose of this pass is to clone basic block paths based on information
+/// provided by the -fbasic-block-sections=list option.
+/// Please refer to BasicBlockSectionsProfileReader.cpp to see a path cloning
+/// example.
+//===----------------------------------------------------------------------===//
+// This pass clones the machine basic blocks alongs the given paths and sets up
+// the CFG. It assigns BBIDs to the cloned blocks so that the
+// `BasicBlockSections` pass can correctly map the cluster information to the
+// blocks. The cloned block's BBID will have the same BaseID as the original
+// block, but will get a unique non-zero CloneID (original blocks all have zero
+// CloneIDs). This pass applies a path cloning if it satisfies the following
+// conditions:
+//   1. All BBIDs in the path should be mapped to existing blocks.
+//   2. Each two consecutive BBIDs in the path must have a successor
+//   relationship in the CFG.
+//   3. The path should not include a block with indirect branches, except for
+//   the last block.
+// If a path does not satisfy all three conditions, it will be rejected, but the
+// CloneIDs for its (supposed to be cloned) blocks will be bypassed to make sure
+// that the `BasicBlockSections` pass can map cluster info correctly to the
+// actually-cloned blocks.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+// Clones the given block and assigns the given `CloneID` to its BBID. Copies
+// the instructions into the new block and sets up its successors.
+MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock &OrigBB,
+                                          unsigned CloneID) {
+  auto &MF = *OrigBB.getParent();
+  auto TII = MF.getSubtarget().getInstrInfo();
+  // Create the clone block and set its BBID based on the original block.
+  MachineBasicBlock *CloneBB = MF.CreateMachineBasicBlock(
+      OrigBB.getBasicBlock(), UniqueBBID{OrigBB.getBBID()->BaseID, CloneID});
+  MF.push_back(CloneBB);
+
+  // Copy the instructions.
+  for (auto &I : OrigBB.instrs()) {
+    // Bundled instructions are duplicated together.
+    if (I.isBundledWithPred())
+      continue;
+    TII->duplicate(*CloneBB, CloneBB->end(), I);
+  }
+
+  // Add the successors of the original block as the new block's successors.
+  // We set the predecessor after returning from this call.
+  for (auto SI = OrigBB.succ_begin(), SE = OrigBB.succ_end(); SI != SE; ++SI)
+    CloneBB->copySuccessor(&OrigBB, SI);
+
+  if (auto FT = OrigBB.getFallThrough(/*JumpToFallThrough=*/false)) {
+    // The original block has an implicit fall through.
+    // Insert an explicit unconditional jump from the cloned block to the
+    // fallthrough block. Technically, this is only needed for the last block
+    // of the path, but we do it for all clones for consistency.
+    TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc());
+  }
+  return CloneBB;
+}
+
+// Returns if we can legally apply the cloning represented by `ClonePath`.
+// `BBIDToBlock` contains the original basic blocks in function `MF` keyed by
+// their `BBID::BaseID`.
+bool IsValidCloning(const MachineFunction &MF,
+                    const DenseMap<unsigned, MachineBasicBlock *> &BBIDToBlock,
+                    const SmallVector<unsigned> &ClonePath) {
+  const MachineBasicBlock *PrevBB = nullptr;
+  for (size_t I = 0; I < ClonePath.size(); ++I) {
+    unsigned BBID = ClonePath[I];
+    const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID);
+    if (!PathBB) {
+      WithColor::warning() << "no block with id " << BBID << " in function "
+                           << MF.getName() << "\n";
+      return false;
+    }
+
+    if (PrevBB) {
+      if (!PrevBB->isSuccessor(PathBB)) {
+        WithColor::warning()
+            << "block #" << BBID << " is not a successor of block #"
+            << PrevBB->getBBID()->BaseID << " in function " << MF.getName()
+            << "\n";
+        return false;
+      }
+
+      for (auto &MI : *PathBB) {
+        // Avoid cloning when the block contains non-duplicable instructions.
+        // CFI instructions are marked as non-duplicable only because of Darwin,
+        // so we exclude them from this check.
+        if (MI.isNotDuplicable() && !MI.isCFIInstruction()) {
+          WithColor::warning()
+              << "block #" << BBID
+              << " has non-duplicable instructions in function " << MF.getName()
+              << "\n";
+          return false;
+        }
+      }
+    }
+
+    if (I != ClonePath.size() - 1 && !PathBB->empty() &&
+        PathBB->back().isIndirectBranch()) {
+      WithColor::warning()
+          << "block #" << BBID
+          << " has indirect branch and appears as the non-tail block of a "
+             "path in function "
+          << MF.getName() << "\n";
+      return false;
+    }
+    PrevBB = PathBB;
+  }
+  return true;
+}
+
+// Applies all clonings specified in `ClonePaths` to `MF`. Returns true
+// if any clonings have been applied.
+bool ApplyCloning(MachineFunction &MF,
+                  const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+  if (ClonePaths.empty())
+    return false;
+  bool AnyPathsCloned = false;
+  // Map from the final BB IDs to the `MachineBasicBlock`s.
+  DenseMap<unsigned, MachineBasicBlock *> BBIDToBlock;
+  for (auto &BB : MF)
+    BBIDToBlock.try_emplace(BB.getBBID()->BaseID, &BB);
+
+  DenseMap<unsigned, unsigned> NClonesForBBID;
+  auto TII = MF.getSubtarget().getInstrInfo();
+  for (const auto &ClonePath : ClonePaths) {
+    if (!IsValidCloning(MF, BBIDToBlock, ClonePath)) {
+      // We still need to increment the number of clones so we can map
+      // to the cluster info correctly.
+      for (unsigned BBID : ClonePath)
+        ++NClonesForBBID[BBID];
+      continue;
+    }
+    MachineBasicBlock *PrevBB = nullptr;
+    for (unsigned BBID : ClonePath) {
+      MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID);
+      if (PrevBB == nullptr) {
+        // The first block in the path is not cloned. We only need to make it
+        // branch to the next cloned block in the path. Here, we make its
+        // fallthrough explicit so we can change it later.
+        if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+          TII->insertUnconditionalBranch(*OrigBB, FT,
+                                         OrigBB->findBranchDebugLoc());
+        }
+        PrevBB = OrigBB;
+        continue;
+      }
+      MachineBasicBlock *CloneBB =
+          CloneMachineBasicBlock(*OrigBB, ++NClonesForBBID[BBID]);
+
+      // Set up the previous block in the path to jump to the clone. This also
+      // transfers the successor/predecessor relationship of PrevBB and OrigBB
+      // to that of PrevBB and CloneBB.
+      PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB);
+
+      // Copy the livein set.
+      for (auto &LiveIn : OrigBB->liveins())
+        CloneBB->addLiveIn(LiveIn);
+
+      PrevBB = CloneBB;
+    }
+    AnyPathsCloned = true;
+  }
+  return AnyPathsCloned;
+}
+} // end anonymous namespace
+
+namespace llvm {
+class BasicBlockPathCloning : public MachineFunctionPass {
+public:
+  static char ID;
+
+  BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
+
+  BasicBlockPathCloning() : MachineFunctionPass(ID) {
+    initializeBasicBlockPathCloningPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Basic Block Path Cloning"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Identify basic blocks that need separate sections and prepare to emit them
+  /// accordingly.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // namespace llvm
+
+char BasicBlockPathCloning::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    BasicBlockPathCloning, "bb-path-cloning",
+    "Applies path clonings for the -basic-block-sections=list option", false,
+    false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_END(
+    BasicBlockPathCloning, "bb-path-cloning",
+    "Applies path clonings for the -basic-block-sections=list option", false,
+    false)
+
+bool BasicBlockPathCloning::runOnMachineFunction(MachineFunction &MF) {
+  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+         "BB Sections list not enabled!");
+  if (hasInstrProfHashMismatch(MF))
+    return false;
+
+  return ApplyCloning(MF, getAnalysis<BasicBlockSectionsProfileReader>()
+                              .getClonePathsForFunction(MF.getName()));
+}
+
+void BasicBlockPathCloning::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<BasicBlockSectionsProfileReader>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createBasicBlockPathCloningPass() {
+  return new BasicBlockPathCloning();
+}

diff  --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 632fd68d88b5c64..42997d2287d61d7 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -175,12 +175,12 @@ updateBranches(MachineFunction &MF,
 // clusters, they are moved into a single "Exception" section. Eventually,
 // clusters are ordered in increasing order of their IDs, with the "Exception"
 // and "Cold" succeeding all other clusters.
-// ClusterInfoByBBID represents the cluster information for basic blocks. It
+// FuncClusterInfo represents the cluster information for basic blocks. It
 // maps from BBID of basic blocks to their cluster information. If this is
 // empty, it means unique sections for all basic blocks in the function.
-static void assignSections(
-    MachineFunction &MF,
-    const DenseMap<unsigned, BBClusterInfo<unsigned>> &ClusterInfoByBBID) {
+static void
+assignSections(MachineFunction &MF,
+               const DenseMap<UniqueBBID, BBClusterInfo> &FuncClusterInfo) {
   assert(MF.hasBBSections() && "BB Sections is not set for function.");
   // This variable stores the section ID of the cluster containing eh_pads (if
   // all eh_pads are one cluster). If more than one cluster contain eh_pads, we
@@ -191,17 +191,17 @@ static void assignSections(
     // With the 'all' option, every basic block is placed in a unique section.
     // With the 'list' option, every basic block is placed in a section
     // associated with its cluster, unless we want individual unique sections
-    // for every basic block in this function (if ClusterInfoByBBID is empty).
+    // for every basic block in this function (if FuncClusterInfo is empty).
     if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All ||
-        ClusterInfoByBBID.empty()) {
+        FuncClusterInfo.empty()) {
       // If unique sections are desired for all basic blocks of the function, we
       // set every basic block's section ID equal to its original position in
       // the layout (which is equal to its number). This ensures that basic
       // blocks are ordered canonically.
       MBB.setSectionID(MBB.getNumber());
     } else {
-      auto I = ClusterInfoByBBID.find(*MBB.getBBID());
-      if (I != ClusterInfoByBBID.end()) {
+      auto I = FuncClusterInfo.find(*MBB.getBBID());
+      if (I != FuncClusterInfo.end()) {
         MBB.setSectionID(I->second.ClusterID);
       } else {
         // BB goes into the special cold section if it is not specified in the
@@ -264,12 +264,7 @@ void llvm::avoidZeroOffsetLandingPad(MachineFunction &MF) {
   }
 }
 
-// This checks if the source of this function has drifted since this binary was
-// profiled previously.  For now, we are piggy backing on what PGO does to
-// detect this with instrumented profiles.  PGO emits an hash of the IR and
-// checks if the hash has changed.  Advanced basic block layout is usually done
-// on top of PGO optimized binaries and hence this check works well in practice.
-static bool hasInstrProfHashMismatch(MachineFunction &MF) {
+bool llvm::hasInstrProfHashMismatch(MachineFunction &MF) {
   if (!BBSectionsDetectSourceDrift)
     return false;
 
@@ -290,7 +285,7 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
   assert(BBSectionsType != BasicBlockSection::None &&
          "BB Sections not enabled!");
 
-  // Check for source drift.  If the source has changed since the profiles
+  // Check for source drift. If the source has changed since the profiles
   // were obtained, optimizing basic blocks might be sub-optimal.
   // This only applies to BasicBlockSection::List as it creates
   // clusters of basic blocks using basic block ids. Source drift can
@@ -298,38 +293,30 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
   // regards to performance.
   if (BBSectionsType == BasicBlockSection::List &&
       hasInstrProfHashMismatch(MF))
-    return true;
+    return false;
   // Renumber blocks before sorting them. This is useful for accessing the
   // original layout positions and finding the original fallthroughs.
   MF.RenumberBlocks();
 
   if (BBSectionsType == BasicBlockSection::Labels) {
     MF.setBBSectionsType(BBSectionsType);
-    return true;
+    return false;
   }
 
-  DenseMap<unsigned, BBClusterInfo<unsigned>> ClusterInfoByBBID;
+  DenseMap<UniqueBBID, BBClusterInfo> FuncClusterInfo;
   if (BBSectionsType == BasicBlockSection::List) {
-    auto [HasProfile, PathAndClusterInfo] =
+    auto [HasProfile, ClusterInfo] =
         getAnalysis<BasicBlockSectionsProfileReader>()
-            .getPathAndClusterInfoForFunction(MF.getName());
+            .getClusterInfoForFunction(MF.getName());
     if (!HasProfile)
-      return true;
-    for (const BBClusterInfo<ProfileBBID> &BBP :
-         PathAndClusterInfo.ClusterInfo) {
-      // TODO: Apply the path cloning profile.
-      assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet");
-      const auto [I, Inserted] = ClusterInfoByBBID.try_emplace(
-          BBP.BasicBlockID.BBID,
-          BBClusterInfo<unsigned>{BBP.BasicBlockID.BBID, BBP.ClusterID,
-                                  BBP.PositionInCluster});
-      (void)I;
-      assert(Inserted && "Duplicate BBID found in profile");
+      return false;
+    for (auto &BBClusterInfo : ClusterInfo) {
+      FuncClusterInfo.try_emplace(BBClusterInfo.BBID, BBClusterInfo);
     }
   }
 
   MF.setBBSectionsType(BBSectionsType);
-  assignSections(MF, ClusterInfoByBBID);
+  assignSections(MF, FuncClusterInfo);
 
   // We make sure that the cluster including the entry basic block precedes all
   // other clusters.
@@ -363,8 +350,8 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
     // If the two basic block are in the same section, the order is decided by
     // their position within the section.
     if (XSectionID.Type == MBBSectionID::SectionType::Default)
-      return ClusterInfoByBBID.lookup(*X.getBBID()).PositionInCluster <
-             ClusterInfoByBBID.lookup(*Y.getBBID()).PositionInCluster;
+      return FuncClusterInfo.lookup(*X.getBBID()).PositionInCluster <
+             FuncClusterInfo.lookup(*Y.getBBID()).PositionInCluster;
     return X.getNumber() < Y.getNumber();
   };
 

diff  --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 6bb412a6c7534a6..96662378a869316 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -35,15 +35,15 @@ INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
                 "Reads and parses a basic block sections profile.", false,
                 false)
 
-Expected<ProfileBBID>
-BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const {
+Expected<UniqueBBID>
+BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
   SmallVector<StringRef, 2> Parts;
   S.split(Parts, '.');
   if (Parts.size() > 2)
     return createProfileParseError(Twine("unable to parse basic block id: '") +
                                    S + "'");
-  unsigned long long BBID;
-  if (getAsUnsignedInteger(Parts[0], 10, BBID))
+  unsigned long long BaseBBID;
+  if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
     return createProfileParseError(
         Twine("unable to parse BB id: '" + Parts[0]) +
         "': unsigned integer expected");
@@ -51,21 +51,27 @@ BasicBlockSectionsProfileReader::parseProfileBBID(StringRef S) const {
   if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
     return createProfileParseError(Twine("unable to parse clone id: '") +
                                    Parts[1] + "': unsigned integer expected");
-  return ProfileBBID{static_cast<unsigned>(BBID),
-                     static_cast<unsigned>(CloneID)};
+  return UniqueBBID{static_cast<unsigned>(BaseBBID),
+                    static_cast<unsigned>(CloneID)};
 }
 
 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
-  return getPathAndClusterInfoForFunction(FuncName).first;
+  return getClusterInfoForFunction(FuncName).first;
 }
 
-std::pair<bool, FunctionPathAndClusterInfo>
-BasicBlockSectionsProfileReader::getPathAndClusterInfoForFunction(
+std::pair<bool, SmallVector<BBClusterInfo>>
+BasicBlockSectionsProfileReader::getClusterInfoForFunction(
     StringRef FuncName) const {
   auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
   return R != ProgramPathAndClusterInfo.end()
-             ? std::pair(true, R->second)
-             : std::pair(false, FunctionPathAndClusterInfo());
+             ? std::pair(true, R->second.ClusterInfo)
+             : std::pair(false, SmallVector<BBClusterInfo>());
+}
+
+SmallVector<SmallVector<unsigned>>
+BasicBlockSectionsProfileReader::getClonePathsForFunction(
+    StringRef FuncName) const {
+  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
 }
 
 // Reads the version 1 basic block sections profile. Profile for each function
@@ -133,7 +139,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
 
   // Temporary set to ensure every basic block ID appears once in the clusters
   // of a function.
-  DenseSet<ProfileBBID> FuncBBIDs;
+  DenseSet<UniqueBBID> FuncBBIDs;
 
   // Debug-info-based module filename for the current function. Empty string
   // means no filename.
@@ -199,7 +205,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       // Reset current cluster position.
       CurrentPosition = 0;
       for (auto BasicBlockIDStr : Values) {
-        auto BasicBlockID = parseProfileBBID(BasicBlockIDStr);
+        auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
         if (!BasicBlockID)
           return BasicBlockID.takeError();
         if (!FuncBBIDs.insert(*BasicBlockID).second)
@@ -207,28 +213,32 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
               Twine("duplicate basic block id found '") + BasicBlockIDStr +
               "'");
 
-        if (!BasicBlockID->BBID && CurrentPosition)
+        if (!BasicBlockID->BaseID && CurrentPosition)
           return createProfileParseError(
               "entry BB (0) does not begin a cluster.");
 
-        FI->second.ClusterInfo.emplace_back(BBClusterInfo<ProfileBBID>{
+        FI->second.ClusterInfo.emplace_back(BBClusterInfo{
             *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
       }
       CurrentCluster++;
       continue;
     case 'p': { // Basic block cloning path specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramPathAndClusterInfo.end())
+        continue;
       SmallSet<unsigned, 5> BBsInPath;
       FI->second.ClonePaths.push_back({});
       for (size_t I = 0; I < Values.size(); ++I) {
-        auto BBIDStr = Values[I];
-        unsigned long long BBID = 0;
-        if (getAsUnsignedInteger(BBIDStr, 10, BBID))
+        auto BaseBBIDStr = Values[I];
+        unsigned long long BaseBBID = 0;
+        if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
           return createProfileParseError(Twine("unsigned integer expected: '") +
-                                         BBIDStr + "'");
-        if (I != 0 && !BBsInPath.insert(BBID).second)
+                                         BaseBBIDStr + "'");
+        if (I != 0 && !BBsInPath.insert(BaseBBID).second)
           return createProfileParseError(
-              Twine("duplicate cloned block in path: '") + BBIDStr + "'");
-        FI->second.ClonePaths.back().push_back(BBID);
+              Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
+        FI->second.ClonePaths.back().push_back(BaseBBID);
       }
       continue;
     }
@@ -282,9 +292,9 @@ Error BasicBlockSectionsProfileReader::ReadV0Profile() {
               "entry BB (0) does not begin a cluster");
 
         FI->second.ClusterInfo.emplace_back(
-            BBClusterInfo<ProfileBBID>({{static_cast<unsigned>(BBID), 0},
-                                        CurrentCluster,
-                                        CurrentPosition++}));
+            BBClusterInfo({{static_cast<unsigned>(BBID), 0},
+                           CurrentCluster,
+                           CurrentPosition++}));
       }
       CurrentCluster++;
     } else {

diff  --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 389c70d04f17ba3..df2d1831ee5fdbf 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -46,6 +46,7 @@ add_llvm_component_library(LLVMCodeGen
   BranchRelaxation.cpp
   BreakFalseDeps.cpp
   BasicBlockSections.cpp
+  BasicBlockPathCloning.cpp
   BasicBlockSectionsProfileReader.cpp
   CalcSpillWeights.cpp
   CallBrPrepare.cpp

diff  --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 6272b654b329539..79a95ee0d747a1c 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -20,6 +20,7 @@ using namespace llvm;
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAssignmentTrackingAnalysisPass(Registry);
   initializeAtomicExpandPass(Registry);
+  initializeBasicBlockPathCloningPass(Registry);
   initializeBasicBlockSectionsPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeBranchRelaxationPass(Registry);

diff  --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 65280c65b68781e..c01b34d6f490b0e 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -500,7 +500,7 @@ class MIParser {
   bool parseAlignment(uint64_t &Alignment);
   bool parseAddrspace(unsigned &Addrspace);
   bool parseSectionID(std::optional<MBBSectionID> &SID);
-  bool parseBBID(std::optional<unsigned> &BBID);
+  bool parseBBID(std::optional<UniqueBBID> &BBID);
   bool parseCallFrameSize(unsigned &CallFrameSize);
   bool parseOperandsOffset(MachineOperand &Op);
   bool parseIRValue(const Value *&V);
@@ -666,14 +666,20 @@ bool MIParser::parseSectionID(std::optional<MBBSectionID> &SID) {
 }
 
 // Parse Machine Basic Block ID.
-bool MIParser::parseBBID(std::optional<unsigned> &BBID) {
+bool MIParser::parseBBID(std::optional<UniqueBBID> &BBID) {
   assert(Token.is(MIToken::kw_bb_id));
   lex();
-  unsigned Value = 0;
-  if (getUnsigned(Value))
+  unsigned BaseID = 0;
+  unsigned CloneID = 0;
+  if (getUnsigned(BaseID))
     return error("Unknown BB ID");
-  BBID = Value;
   lex();
+  if (Token.is(MIToken::IntegerLiteral)) {
+    if (getUnsigned(CloneID))
+      return error("Unknown Clone ID");
+    lex();
+  }
+  BBID = {BaseID, CloneID};
   return false;
 }
 
@@ -705,7 +711,7 @@ bool MIParser::parseBasicBlockDefinition(
   bool IsEHFuncletEntry = false;
   std::optional<MBBSectionID> SectionID;
   uint64_t Alignment = 0;
-  std::optional<unsigned> BBID;
+  std::optional<UniqueBBID> BBID;
   unsigned CallFrameSize = 0;
   BasicBlock *BB = nullptr;
   if (consumeIfPresent(MIToken::lparen)) {

diff  --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 5f9e4a66c0d22ed..ef8e1bd63024fa7 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -567,7 +567,9 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
     }
     if (getBBID().has_value()) {
       os << (hasAttributes ? ", " : " (");
-      os << "bb_id " << *getBBID();
+      os << "bb_id " << getBBID()->BaseID;
+      if (getBBID()->CloneID != 0)
+        os << " " << getBBID()->CloneID;
       hasAttributes = true;
     }
     if (CallFrameSize != 0) {
@@ -886,7 +888,7 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
   removeSuccessor(OldI);
 }
 
-void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig,
+void MachineBasicBlock::copySuccessor(const MachineBasicBlock *Orig,
                                       succ_iterator I) {
   if (!Orig->Probs.empty())
     addSuccessor(*I, Orig->getSuccProbability(I));

diff  --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 9e67dcbf27d4c37..07eb0ba7f45c2e3 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -457,16 +457,17 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
 /// Allocate a new MachineBasicBlock. Use this instead of
 /// `new MachineBasicBlock'.
 MachineBasicBlock *
-MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *BB,
+                                         std::optional<UniqueBBID> BBID) {
   MachineBasicBlock *MBB =
       new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
-          MachineBasicBlock(*this, bb);
+          MachineBasicBlock(*this, BB);
   // Set BBID for `-basic-block=sections=labels` and
   // `-basic-block-sections=list` to allow robust mapping of profiles to basic
   // blocks.
   if (Target.getBBSectionsType() == BasicBlockSection::Labels ||
       Target.getBBSectionsType() == BasicBlockSection::List)
-    MBB->setBBID(NextBBID++);
+    MBB->setBBID(BBID.has_value() ? *BBID : UniqueBBID{NextBBID++, 0});
   return MBB;
 }
 

diff  --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index bf1605f06bd88d6..fe7efb73a2dce83 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -430,10 +431,18 @@ bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0,
   return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
-MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const {
-  assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated");
+MachineInstr &
+TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator InsertBefore,
+                           const MachineInstr &Orig) const {
   MachineFunction &MF = *MBB.getParent();
+  // CFI instructions are marked as non-duplicable, because Darwin compact
+  // unwind info emission can't handle multiple prologue setups.
+  assert((!Orig.isNotDuplicable() ||
+          (!MF.getTarget().getTargetTriple().isOSDarwin() &&
+           Orig.isCFIInstruction())) &&
+         "Instruction cannot be duplicated");
+
   return MF.cloneMachineInstrBundle(MBB, InsertBefore, Orig);
 }
 

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index e6ecbc9b03f7149..1f7c949cd6031b1 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1267,6 +1267,7 @@ void TargetPassConfig::addMachinePasses() {
     if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
       addPass(llvm::createBasicBlockSectionsProfileReaderPass(
           TM->getBBSectionsFuncListBuf()));
+      addPass(llvm::createBasicBlockPathCloningPass());
     }
     addPass(llvm::createBasicBlockSectionsPass());
   } else if (TM->Options.EnableMachineFunctionSplitter ||

diff  --git a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir
index 74a7bcf3ae82f1a..f11707c719895da 100644
--- a/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir
+++ b/llvm/test/CodeGen/X86/basic-block-labels-mir-parse.mir
@@ -136,7 +136,7 @@ body:             |
   
     MOV32mi $rbp, 1, $noreg, -8, $noreg, 0 :: (store (s32) into %ir.2)
   
-  bb.3 (%ir-block.9, bb_id 3):
+  bb.3 (%ir-block.9, bb_id 3 2):
     renamable $eax = MOV32rm $rbp, 1, $noreg, -8, $noreg :: (load (s32) from %ir.2)
     $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp
     frame-destroy CFI_INSTRUCTION def_cfa $rsp, 8

diff  --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll
new file mode 100644
index 000000000000000..0f84b891a7c529d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-1.ll
@@ -0,0 +1,71 @@
+;; Test cloning a single path with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+;; Test a valid application of path cloning.
+; RUN: echo 'v1' > %t
+; RUN: echo 'f foo' >> %t
+; RUN: echo 'p 0 3 5' >> %t
+; RUN: echo 'c 0 3.1 5.1 1 2 3 4 5' >> %t
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t -stop-after=bb-path-cloning | FileCheck %s --check-prefix=MIR
+
+define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b3
+
+b1:                                           ; preds = %b0
+  call void @effect(i32 1)
+  br i1 %b, label %b2, label %b3
+
+b2:                                             ; preds = %b1
+  call void @effect(i32 2)
+  br label %b3
+
+b3:                                            ; preds = %b0, %b1, %b2
+  call void @effect(i32 3)
+  br i1 %c, label %b4, label %b5
+
+b4:                                             ; preds = %b3
+  call void @effect(i32 4)
+  br i1 %d, label %b5, label %cold
+
+b5:                                            ; preds = %b3, %b4
+  call void @effect(i32 5)
+  ret void
+cold:
+  call void @effect(i32 6)                     ; preds = %b4
+  ret void
+}
+
+;; Check the cloned block ids in MIR.
+
+; MIR: bb.7.b3 (bb_id 3 1):
+; MIR: bb.8.b5 (bb_id 5 1):
+
+;; Check the final layout and branches.
+
+;; bb section:
+; CHECK:        .section    .text.foo,"ax", at progbits
+; CHECK:      foo:
+; CHECK:      # %bb.0:        # %b0
+; CHECK:        jne .LBB0_1
+; CHECK-NEXT: # %bb.7:        # %b3
+; CHECK:        jne .LBB0_4
+; CHECK-NEXT: # %bb.8:        # %b5
+; CHECK:        retq
+; CHECK-NEXT: .LBB0_1:        # %b1
+; CHECK:        je .LBB0_3
+; CHECK-NEXT: # %bb.2:        # %b2
+; CHECK:        callq effect at PLT
+; CHECK-NEXT: .LBB0_3:        # %b3
+; CHECK:        je .LBB0_5
+; CHECK-NEXT: .LBB0_4:        # %b4
+; CHECK:        je foo.cold
+; CHECK-NEXT: .LBB0_5:        # %b5
+; CHECK:        retq
+
+;; split section
+; CHECK:        .section    .text.split.foo,"ax", at progbits
+; CHECK:      foo.cold:      # %cold

diff  --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll
new file mode 100644
index 000000000000000..c433491a49430a6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-2.ll
@@ -0,0 +1,86 @@
+;; Test cloning two paths with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+; RUN: echo 'v1' > %t
+; RUN: echo 'f foo' >> %t
+; RUN: echo 'p 0 3 5' >> %t
+; RUN: echo 'p 1 3 4 5' >> %t
+; RUN: echo 'c 0 3.1 5.1' >> %t
+; RUN: echo 'c 1 3.2 4.1 5.2 2 3 4 5' >> %t
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t -stop-after=bb-path-cloning | FileCheck %s --check-prefix=MIR
+
+define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b3
+
+b1:                                           ; preds = %b0
+  call void @effect(i32 1)
+  br i1 %b, label %b2, label %b3
+
+b2:                                             ; preds = %b1
+  call void @effect(i32 2)
+  br label %b3
+
+b3:                                            ; preds = %b0, %b1, %b2
+  call void @effect(i32 3)
+  br i1 %c, label %b4, label %b5
+
+b4:                                             ; preds = %b3
+  call void @effect(i32 4)
+  br i1 %d, label %b5, label %cold
+
+b5:                                            ; preds = %b3, %b4
+  call void @effect(i32 5)
+  ret void
+cold:
+  call void @effect(i32 6)                     ; preds = %b4
+  ret void
+}
+
+;; Check the cloned block ids in MIR.
+
+; MIR:   bb.7.b3 (bb_id 3 1):
+; MIR:   bb.8.b5 (bb_id 5 1):
+; MIR:   bb.9.b3 (bb_id 3 2):
+; MIR:   bb.10.b4 (bb_id 4 1):
+; MIR:   bb.11.b5 (bb_id 5 2):
+
+;; Check the final layout and branches.
+
+;; first cluster:
+; CHECK:        .section    .text.foo,"ax", at progbits
+; CHECK:      foo:
+; CHECK:      # %bb.0:        # %b0
+; CHECK:        jne foo.__part.1
+; CHECK-NEXT: # %bb.7:        # %b3
+; CHECK:        jne .LBB0_4
+; CHECK-NEXT: # %bb.8:        # %b5
+; CHECK:        retq
+
+;; second cluster:
+; CHECK:        .section    .text.foo,"ax", at progbits,unique,1
+; CHECK-NEXT: foo.__part.1:   # %b1
+; CHECK:        jne .LBB0_2
+; CHECK-NEXT: # %bb.9:        # %b3
+; CHECK:        je .LBB0_5
+; CHECK-NEXT: # %bb.10:       # %b4
+; CHECK:        je foo.cold
+; CHECK-NEXT: # %bb.11:       # %b5
+; CHECK:        retq
+; CHECK-NEXT: .LBB0_2:        # %b2
+; CHECK:        callq	effect at PLT
+; CHECK-NEXT: # %bb.3:        # %b3
+; CHECK:        je .LBB0_5
+; CHECK-NEXT: .LBB0_4:        # %b4
+; CHECK:        je foo.cold
+; CHECK-NEXT: .LBB0_5:       # %b5
+; CHECK:        retq
+
+;; split section
+; CHECK:        .section    .text.split.foo,"ax", at progbits
+; CHECK:      foo.cold:      # %cold
+
+

diff  --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll
new file mode 100644
index 000000000000000..d8686cdfa098e06
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect-invalid.ll
@@ -0,0 +1,45 @@
+;; Tests for invalid path cloning with -basic-block-sections involving indirect branches.
+
+declare void @effect(i32 zeroext)
+
+;; Test failed application of path cloning for paths with indirect branches.
+; RUN: echo 'v1' > %t1
+; RUN: echo 'f bar' >> %t1
+; RUN: echo 'p 0 1 2' >> %t1
+; RUN: echo 'c 0 1.1 2.1 1' >> %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 2> %t1.err | FileCheck %s
+; RUN: FileCheck %s --check-prefix=WARN < %t1.err
+; RUN: echo 'v1' > %t2
+; RUN: echo 'f bar' >> %t2
+; RUN: echo 'p 1 2' >> %t2
+; RUN: echo 'c 0 1 2.1' >> %t2
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s
+; RUN: FileCheck %s --check-prefix=WARN < %t2.err
+
+
+define void @bar(i1 %a, i1 %b) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b2
+b1:                                              ; preds = %b0
+  call void @effect(i32 1)
+  %0 = select i1 %b,                           ; <ptr> [#uses=1]
+              ptr blockaddress(@bar, %b2),
+              ptr blockaddress(@bar, %b3)
+  indirectbr ptr %0, [label %b2, label %b3]
+b2:                                              ; preds = %b0, %b1
+  call void @effect(i32 2)
+  ret void
+b3:
+  call void @effect(i32 3)                       ; preds = %b1
+  ret void
+}
+
+; CHECK:   .section    .text.bar,"ax", at progbits
+; CHECK:   bar:
+; CHECK: # %bb.0:        # %b0
+; CHECK: # %bb.1:        # %b1
+; CHECK:   .section    .text.split.bar,"ax", at progbits
+; CHECK: bar.cold:       # %b2   
+
+; WARN: warning: block #1 has indirect branch and appears as the non-tail block of a path in function bar

diff  --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll
new file mode 100644
index 000000000000000..3d9a8d36ca10569
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-indirect.ll
@@ -0,0 +1,43 @@
+;; Test for cloning a path ending with indirect branch with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+;; Test valid application of cloning for a path with indirect branch.
+; RUN: echo 'v1' > %t
+; RUN: echo 'f bar' >> %t
+; RUN: echo 'p 0 1' >> %t
+; RUN: echo 'c 0 1.1 2 1' >> %t
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t | FileCheck %s
+
+define void @bar(i1 %a, i1 %b) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b2
+b1:                                              ; preds = %b0
+  call void @effect(i32 1)
+  %0 = select i1 %b,                           ; <ptr> [#uses=1]
+              ptr blockaddress(@bar, %b2),
+              ptr blockaddress(@bar, %b3)
+  indirectbr ptr %0, [label %b2, label %b3]
+b2:                                              ; preds = %b0, %b1
+  call void @effect(i32 2)
+  ret void
+b3:
+  call void @effect(i32 3)                       ; preds = %b1
+  ret void
+}
+
+; CHECK:        .section    .text.bar,"ax", at progbits
+; CHECK:      bar:
+; CHECK:      # %bb.0:        # %b0
+; CHECK:        je .LBB0_2
+; CHECK-NEXT: # %bb.4:        # %b1
+; CHECK:        jmpq *%rax
+; CHECK-NEXT: .Ltmp0:         # Block address taken
+; CHECK-NEXT: .LBB0_2:        # %b2
+; CHECK:        retq
+; CHECK-NEXT: # %bb.1:        # %b1
+; CHECK:        jmpq *%rax
+; CHECK:        .section    .text.split.bar,"ax", at progbits
+; CHECK:      bar.cold:       # %b3
+

diff  --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll
new file mode 100644
index 000000000000000..521ec43ef050aa0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning-invalid.ll
@@ -0,0 +1,72 @@
+;; Tests for invalid or (partially invalid) path clonings with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+;; Test failed application of path cloning.
+; RUN: echo 'v1' > %t1
+; RUN: echo 'f foo' >> %t1
+; RUN: echo 'p 0 2 3' >> %t1
+; RUN: echo 'c 0 2.1 3.1 1' >> %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 2> %t1.err | FileCheck %s
+; RUN: FileCheck %s --check-prefixes=WARN1 < %t1.err
+;; Test that valid clonings are applied correctly, even if invalid clonings exist.
+; RUN: echo 'v1' > %t2
+; RUN: echo 'f foo' >> %t2
+; RUN: echo 'p 0 2 3' >> %t2
+; RUN: echo 'p 0 1 3' >> %t2
+; RUN: echo 'c 0 1.1 3.2 2.1 3.1 1' >> %t2
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 2> %t2.err | FileCheck %s --check-prefixes=PATH
+; RUN: FileCheck %s --check-prefixes=WARN1 < %t2.err
+; RUN: echo 'v1' > %t3
+; RUN: echo 'f foo' >> %t3
+; RUN: echo 'p 0 100' >> %t3
+; RUN: echo 'c 0 100.1 1' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s
+; RUN: FileCheck %s --check-prefixes=WARN2 < %t3.err
+
+define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b3
+
+b1:                                           ; preds = %b0
+  call void @effect(i32 1)
+  br i1 %b, label %b2, label %b3
+
+b2:                                             ; preds = %b1
+  call void @effect(i32 2)
+  br label %b3
+
+b3:                                            ; preds = %b0, %b1, %b2
+  call void @effect(i32 3)
+  br i1 %c, label %b4, label %b5
+
+b4:                                             ; preds = %b3
+  call void @effect(i32 4)
+  br i1 %d, label %b5, label %cold
+
+b5:                                            ; preds = %b3, %b4
+  call void @effect(i32 5)
+  ret void
+cold:
+  call void @effect(i32 6)                     ; preds = %b4
+  ret void
+}
+
+; CHECK:   .section    .text.foo,"ax", at progbits
+; CHECK: foo:
+; CHECK: # %bb.0:        # %b0
+
+; CHECK:   je .LBB0_3
+; PATH:  # %bb.7:      # %b1
+; PATH:  # %bb.8:      # %b3
+; PATH:    jne .LBB0_4
+; CHECK: # %bb.1:      # %b1
+; CHECK:   jne foo.cold
+
+; CHECK: foo.cold:      # %b2
+
+;; Check the warnings
+; WARN1: warning: block #2 is not a successor of block #0 in function foo
+; WARN2: warning: no block with id 100 in function foo
+


        


More information about the llvm-commits mailing list