[llvm] [BasicBlockSections] Apply path cloning with -basic-block-sections. (PR #68860)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 16 09:20:59 PDT 2023


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/68860

>From f206269bb1e8c36277e8cfe99c36fa4e44af433b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 12 Oct 2023 05:52:25 +0000
Subject: [PATCH] [BasicBlockSections] Apply path cloning with
 -basic-block-sections.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h |   1 +
 llvm/lib/CodeGen/BasicBlockSections.cpp       | 173 +++++++++++++++++-
 .../BasicBlockSectionsProfileReader.cpp       |   4 +
 .../X86/basic-block-sections-cloning.ll       | 169 +++++++++++++++++
 4 files changed, 337 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-cloning.ll

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 6e01dfd11ee6dad..e4cb7e5b6643d0a 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
 
 namespace llvm {
 
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 632fd68d88b5c64..7913f069b0f1d72 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -77,8 +77,11 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Target/TargetMachine.h"
 #include <optional>
+#include <sstream>
 
 using namespace llvm;
 
@@ -99,6 +102,33 @@ static cl::opt<bool> BBSectionsDetectSourceDrift(
 
 namespace {
 
+MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock *MBB) {
+  auto &MF = *MBB->getParent();
+  auto TII = MF.getSubtarget().getInstrInfo();
+
+  auto CloneBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+  MF.push_back(CloneBB);
+  // Copy the instructions.
+  for (auto &I : MBB->instrs())
+    CloneBB->push_back(MF.CloneMachineInstr(&I));
+
+  // Add the successors of the original block as the new block's successors.
+  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
+    CloneBB->copySuccessor(MBB, SI);
+
+  if (auto FT = MBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+    // The original block has an implicit fall through.
+    // Insert an explicit unconditional jump from the cloned block to the
+    // fallthrough block.
+    TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc());
+  }
+
+  for (auto &LiveIn : MBB->liveins())
+    CloneBB->addLiveIn(LiveIn);
+
+  return CloneBB;
+}
+
 class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
@@ -285,6 +315,128 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) {
   return false;
 }
 
+// Returns `Error::success` if we can legally apply all clonings specified in
+// `ClonePaths`, and `Error` otherwise.
+static Error
+CheckValidCloning(const MachineFunction &MF,
+                  const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+  // Map from the final BB IDs to the `MachineBasicBlock`s.
+  DenseMap<unsigned, const MachineBasicBlock *> BBIDToBlock;
+  for (auto &BB : MF)
+    BBIDToBlock.try_emplace(*BB.getBBID(), &BB);
+
+  for (auto &ClonePath : ClonePaths) {
+    const MachineBasicBlock *PrevBB = nullptr;
+    for (size_t I = 0; I < ClonePath.size(); ++I) {
+      unsigned BBID = ClonePath[I];
+      const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID);
+      if (!PathBB) {
+        return make_error<StringError>(Twine("no block with id ") +
+                                           Twine(BBID) + " in function " +
+                                           MF.getName(),
+                                       inconvertibleErrorCode());
+      }
+
+      if (PrevBB && !PrevBB->isSuccessor(PathBB)) {
+        return make_error<StringError>(
+            Twine("block #") + Twine(BBID) + " is not a successor of block #" +
+                Twine(*PrevBB->getBBID()) + " in function " + MF.getName(),
+            inconvertibleErrorCode());
+      }
+
+      if (I != ClonePath.size() - 1 && !PathBB->empty() &&
+          PathBB->back().isIndirectBranch()) {
+        return make_error<StringError>(
+            Twine("block #") + Twine(BBID) +
+                " has indirect branch can only appear as the last block of the "
+                "path, in function " +
+                MF.getName(),
+            inconvertibleErrorCode());
+      }
+      PrevBB = PathBB;
+    }
+  }
+  return Error::success();
+}
+
+// Applies all clonings specified in `ClonePaths` to `MF` and returns a map
+// from `ProfileBBID`s of all clone blocks to their BBIDs (assigned by
+// `MachineFunction`).
+static DenseMap<ProfileBBID, unsigned>
+ApplyCloning(MachineFunction &MF,
+             const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+  DenseMap<ProfileBBID, unsigned> CloneBBIDMap;
+  // Map from the final BB IDs to the `MachineBasicBlock`s.
+  DenseMap<unsigned, MachineBasicBlock *> BBIDToBlock;
+  for (auto &BB : MF)
+    BBIDToBlock.try_emplace(*BB.getBBID(), &BB);
+
+  DenseMap<unsigned, unsigned> NClonesForBBID;
+  auto TII = MF.getSubtarget().getInstrInfo();
+  for (const auto &ClonePath : ClonePaths) {
+
+    MachineBasicBlock *PrevBB = nullptr;
+    for (unsigned BBID : ClonePath) {
+      MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID);
+      if (PrevBB == nullptr) {
+        if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+          // Make fallthroughs explicit since we may change the layout.
+          TII->insertUnconditionalBranch(*OrigBB, FT,
+                                         OrigBB->findBranchDebugLoc());
+        }
+        PrevBB = OrigBB;
+        continue;
+      }
+
+      MachineBasicBlock *CloneBB = CloneMachineBasicBlock(OrigBB);
+
+      CloneBBIDMap.try_emplace({BBID, ++NClonesForBBID[BBID]},
+                               *CloneBB->getBBID());
+      // Set up the previous block in the path to jump to the clone.
+      PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB);
+      PrevBB = CloneBB;
+    }
+  }
+  return CloneBBIDMap;
+}
+
+// Processes the raw input profile `PathAndClusterInfo` for the given function
+// `MF` and returns the final profile (cluster information). Returns error if
+// `PathAndClusterInfo` is invalid and cannot be applied (e.g., invalid cloning
+// paths).
+//
+// Steps:
+//    1. Clones basic blocks based on `PathAndClusterInfo.ClonePaths` (if not
+//    empty) and
+//       updates the CFG accordingly.
+//    2. Creates and returns the cluster profile for the final blocks (original
+//       and cloned) based on `PathAndClusterInfo.ClusterInfo`.
+static Expected<DenseMap<unsigned, BBClusterInfo<unsigned>>>
+ProcessProfile(MachineFunction &MF,
+               const FunctionPathAndClusterInfo &PathAndClusterInfo) {
+  if (auto Err = CheckValidCloning(MF, PathAndClusterInfo.ClonePaths))
+    return std::move(Err);
+
+  // Apply the clonings and obtain the map from the input block ID of cloned
+  // blocks to their final BB IDs.
+  DenseMap<ProfileBBID, unsigned> CloneBBIDMap =
+      ApplyCloning(MF, PathAndClusterInfo.ClonePaths);
+
+  // Map from final BB IDs to their profile information.
+  DenseMap<unsigned, BBClusterInfo<unsigned>> ClusterInfoByBBID;
+  // This step creates all the necessary clones. It does not adjust the
+  // branches.
+  for (const BBClusterInfo<ProfileBBID> &P : PathAndClusterInfo.ClusterInfo) {
+    unsigned FinalBBID = P.BasicBlockID.CloneID == 0
+                             ? P.BasicBlockID.BBID
+                             : CloneBBIDMap.at(P.BasicBlockID);
+    ClusterInfoByBBID.try_emplace(
+        FinalBBID,
+        BBClusterInfo<unsigned>{FinalBBID, P.ClusterID, P.PositionInCluster});
+  }
+  return ClusterInfoByBBID;
+}
+
 bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
   auto BBSectionsType = MF.getTarget().getBBSectionsType();
   assert(BBSectionsType != BasicBlockSection::None &&
@@ -315,17 +467,18 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
             .getPathAndClusterInfoForFunction(MF.getName());
     if (!HasProfile)
       return true;
-    for (const BBClusterInfo<ProfileBBID> &BBP :
-         PathAndClusterInfo.ClusterInfo) {
-      // TODO: Apply the path cloning profile.
-      assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet");
-      const auto [I, Inserted] = ClusterInfoByBBID.try_emplace(
-          BBP.BasicBlockID.BBID,
-          BBClusterInfo<unsigned>{BBP.BasicBlockID.BBID, BBP.ClusterID,
-                                  BBP.PositionInCluster});
-      (void)I;
-      assert(Inserted && "Duplicate BBID found in profile");
+    auto ClusterInfoOrErr = ProcessProfile(MF, PathAndClusterInfo);
+    if (!ClusterInfoOrErr) {
+      auto Err =
+          handleErrors(ClusterInfoOrErr.takeError(), [&](const StringError &E) {
+            WithColor::warning()
+                << "Rejecting the BBSections profile for function "
+                << MF.getName() << " : " << E.getMessage() << "\n";
+          });
+      assert(!Err);
+      return true;
     }
+    ClusterInfoByBBID = *std::move(ClusterInfoOrErr);
   }
 
   MF.setBBSectionsType(BBSectionsType);
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 6bb412a6c7534a6..650c2770ae3bee1 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -217,6 +217,10 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       CurrentCluster++;
       continue;
     case 'p': { // Basic block cloning path specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramPathAndClusterInfo.end())
+        continue;
       SmallSet<unsigned, 5> BBsInPath;
       FI->second.ClonePaths.push_back({});
       for (size_t I = 0; I < Values.size(); ++I) {
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll
new file mode 100644
index 000000000000000..0cca18b5aa8ad3a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll
@@ -0,0 +1,169 @@
+;; Tests for path cloning with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+;; Test valid application of path cloning.
+; RUN: echo 'v1' > %t1
+; RUN: echo 'f foo' >> %t1
+; RUN: echo 'p 0 3 5' >> %t1
+; RUN: echo 'c 0 3.1 5.1 1 2 3 4 5' >> %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 | FileCheck %s --check-prefixes=PATH1,FOOSECTIONS
+; RUN: echo 'v1' > %t2
+; RUN: echo 'f foo' >> %t2
+; RUN: echo 'p 0 3 5' >> %t2
+; RUN: echo 'p 1 3 4 5' >> %t2
+; RUN: echo 'c 0 3.1 5.1' >> %t2
+; RUN: echo 'c 1 3.2 4.1 5.2 2 3 4 5' >> %t2
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 | FileCheck %s --check-prefixes=PATH2,FOOSECTIONS
+
+;; Test failed application of path cloning.
+; RUN: echo 'v1' > %t3
+; RUN: echo 'f foo' >> %t3
+; RUN: echo 'p 0 2 3' >> %t3
+; RUN: echo 'c 0 2.1 3.1' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH3-WARN < %t3.err
+; RUN: echo 'v1' > %t4
+; RUN: echo 'f foo' >> %t4
+; RUN: echo 'p 0 1 100' >> %t4
+; RUN: echo 'c 0' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t4 2> %t4.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH4-WARN < %t4.err
+
+define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b3
+
+b1:                                           ; preds = %b0
+  call void @effect(i32 1)
+  br i1 %b, label %b2, label %b3
+
+b2:                                             ; preds = %b1
+  call void @effect(i32 2)
+  br label %b3
+
+b3:                                            ; preds = %b0, %b1, %b2
+  call void @effect(i32 3)
+  br i1 %c, label %b4, label %b5
+
+b4:                                             ; preds = %b3
+  call void @effect(i32 4)
+  br i1 %d, label %b5, label %cold
+
+b5:                                            ; preds = %b3, %b4
+  call void @effect(i32 5)
+  ret void
+cold:
+  call void @effect(i32 6)                     ; preds = %b4
+  ret void
+}
+
+; PATH1:         .section    .text.foo,"ax", at progbits
+; PATH1-LABEL: foo:
+; PATH1:       # %bb.0:        # %b0
+; PATH1:         jne .LBB0_1
+; PATH1-NEXT:  # %bb.7:        # %b3
+; PATH1:         jne .LBB0_4
+; PATH1-NEXT:  # %bb.8:        # %b5
+; PATH1:        retq
+; PATH1-NEXT:  .LBB0_1:        # %b1
+; PATH1:         je .LBB0_3
+; PATH1-NEXT:  # %bb.2:        # %b2
+; PATH1:         callq effect at PLT
+; PATH1-NEXT:  .LBB0_3:        # %b3
+; PATH1:         je .LBB0_5
+; PATH1-NEXT:  .LBB0_4:        # %b4
+; PATH1:         je foo.cold
+; PATH1-NEXT:  .LBB0_5:        # %b5
+; PATH1:         retq
+
+; PATH2:         .section    .text.foo,"ax", at progbits
+; PATH2-LABEL: foo:
+; PATH2:       # %bb.0:        # %b0
+; PATH2:         jne foo.__part.1
+; PATH2-NEXT:  # %bb.7:        # %b3
+; PATH2:         jne .LBB0_4
+; PATH2-NEXT:  # %bb.8:        # %b5
+; PATH2:        retq
+; PATH2:         .section    .text.foo,"ax", at progbits,unique,1
+; PATH2-NEXT:  foo.__part.1:   # %b1
+; PATH2:         jne .LBB0_2
+; PATH2-NEXT:  # %bb.9:        # %b3
+; PATH2:         je .LBB0_5
+; PATH2-NEXT:  # %bb.10:       # %b4
+; PATH2:         je foo.cold
+; PATH2-NEXT:  # %bb.11:       # %b5
+; PATH2:         retq
+; PATH2-NEXT:  .LBB0_2:        # %b2
+; PATH2:         callq	effect at PLT
+; PATH2-NEXT:  # %bb.3:        # %b3
+; PATH2:         je .LBB0_5
+; PATH2-NEXT:  .LBB0_4:        # %b4
+; PATH2:          je foo.cold
+; PATH2-NEXT:  .LBB0_5:       # %b5
+; PATH2:          retq
+; FOOSECTIONS: foo.cold
+
+; PATH3-WARN: warning: Rejecting the BBSections profile for function foo : block #2 is not a successor of block #0 in function foo
+; PATH4-WARN: warning: Rejecting the BBSections profile for function foo : no block with id 100 in function foo
+; NOSECTIONS-NOT: foo.cold
+
+
+;; Test valid application of cloning for paths with indirect branches.
+; RUN: echo 'v1' > %t5
+; RUN: echo 'f bar' >> %t5
+; RUN: echo 'p 0 1' >> %t5
+; RUN: echo 'c 0 1.1 2 1' >> %t5
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t5 | FileCheck %s --check-prefix=PATH5
+; RUN: echo 'v1' > %t6
+; RUN: echo 'f bar' >> %t6
+; RUN: echo 'p 0 1 2' >> %t6
+; RUN: echo 'c 0 1.1 2.1 1' >> %t6
+
+;; Test failed application of path cloning for paths with indirect branches.
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t6 2> %t6.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH-INDIR-WARN < %t6.err
+; RUN: echo 'v1' > %t7
+; RUN: echo 'f bar' >> %t7
+; RUN: echo 'p 1 2' >> %t7
+; RUN: echo 'c 0 1 2.1' >> %t7
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t7 2> %t7.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH-INDIR-WARN < %t7.err
+
+
+define void @bar(i1 %a, i1 %b) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b2
+b1:                                              ; preds = %b0
+  call void @effect(i32 1)
+  %0 = select i1 %b,                           ; <ptr> [#uses=1]
+              ptr blockaddress(@bar, %b2),
+              ptr blockaddress(@bar, %b3)
+  indirectbr ptr %0, [label %b2, label %b3]
+b2:                                              ; preds = %b0, %b1
+  call void @effect(i32 2)
+  ret void
+b3:
+  call void @effect(i32 3)                       ; preds = %b1
+  ret void
+}
+
+
+; PATH5:         .section    .text.bar,"ax", at progbits
+; PATH5-LABEL: bar:
+; PATH5:       # %bb.0:        # %b0
+; PATH5:         je .LBB1_2
+; PATH5-NEXT:  # %bb.4:        # %b1
+; PATH5:         jmpq *%rax
+; PATH5-NEXT:  .Ltmp0:         # Block address taken
+; PATH5-NEXT:  .LBB1_2:        # %b2
+; PATH5:         retq
+; PATH5-NEXT:  # %bb.1:        # %b1
+; PATH5:         jmpq *%rax
+; PATH5:         .section    .text.split.bar,"ax", at progbits
+; PATH5:       bar.cold:       # %b3
+; NOSECTIONS-NOT: bar.cold
+
+; PATH-INDIR-WARN: warning: Rejecting the BBSections profile for function bar : block #1 has indirect branch can only appear as the last block of the path, in function bar



More information about the llvm-commits mailing list