[llvm] [BasicBlockSections] Apply path cloning with -basic-block-sections. (PR #68860)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 12 00:28:06 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Rahman Lavaee (rlavaee)

<details>
<summary>Changes</summary>

https://github.com/llvm/llvm-project/commit/28b912687900bc0a67cd61c374fce296b09963c4 introduced the path cloning format in the basic-block-sections profile.

This PR validates and applies path clonings. 
A path cloning is valid if all of these conditions hold:
  1. All bb ids in the path are mapped to existing blocks.
  2. Each two consecutive bb ids in the path have a successor relationship in the CFG.
  3. The path does not include a block with indirect branches, except possibly as the last block.
 
Applying a path cloning involves cloning all blocks in the path (except the first one) and setting up their branches.
Once all clonings are applied, the cluster information is used to guide block layout in the modified function.

---
Full diff: https://github.com/llvm/llvm-project/pull/68860.diff


4 Files Affected:

- (modified) .gitignore (+1) 
- (modified) llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h (+1) 
- (modified) llvm/lib/CodeGen/BasicBlockSections.cpp (+163-10) 
- (added) llvm/test/CodeGen/X86/basic-block-sections-cloning.ll (+169) 


``````````diff
diff --git a/.gitignore b/.gitignore
index 20c4f52cd37860e..bd4f8f12cc23916 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,4 @@ pythonenv*
 /clang/utils/analyzer/projects/*/RefScanBuildResults
 # automodapi puts generated documentation files here.
 /lldb/docs/python_api/
+plo/
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 6e01dfd11ee6dad..e4cb7e5b6643d0a 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -25,6 +25,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
 
 namespace llvm {
 
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 632fd68d88b5c64..7913f069b0f1d72 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -77,8 +77,11 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Target/TargetMachine.h"
 #include <optional>
+#include <sstream>
 
 using namespace llvm;
 
@@ -99,6 +102,33 @@ static cl::opt<bool> BBSectionsDetectSourceDrift(
 
 namespace {
 
+MachineBasicBlock *CloneMachineBasicBlock(MachineBasicBlock *MBB) {
+  auto &MF = *MBB->getParent();
+  auto TII = MF.getSubtarget().getInstrInfo();
+
+  auto CloneBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+  MF.push_back(CloneBB);
+  // Copy the instructions.
+  for (auto &I : MBB->instrs())
+    CloneBB->push_back(MF.CloneMachineInstr(&I));
+
+  // Add the successors of the original block as the new block's successors.
+  for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
+    CloneBB->copySuccessor(MBB, SI);
+
+  if (auto FT = MBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+    // The original block has an implicit fall through.
+    // Insert an explicit unconditional jump from the cloned block to the
+    // fallthrough block.
+    TII->insertUnconditionalBranch(*CloneBB, FT, CloneBB->findBranchDebugLoc());
+  }
+
+  for (auto &LiveIn : MBB->liveins())
+    CloneBB->addLiveIn(LiveIn);
+
+  return CloneBB;
+}
+
 class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
@@ -285,6 +315,128 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) {
   return false;
 }
 
+// Returns `Error::success` if we can legally apply all clonings specified in
+// `ClonePaths`, and `Error` otherwise.
+static Error
+CheckValidCloning(const MachineFunction &MF,
+                  const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+  // Map from the final BB IDs to the `MachineBasicBlock`s.
+  DenseMap<unsigned, const MachineBasicBlock *> BBIDToBlock;
+  for (auto &BB : MF)
+    BBIDToBlock.try_emplace(*BB.getBBID(), &BB);
+
+  for (auto &ClonePath : ClonePaths) {
+    const MachineBasicBlock *PrevBB = nullptr;
+    for (size_t I = 0; I < ClonePath.size(); ++I) {
+      unsigned BBID = ClonePath[I];
+      const MachineBasicBlock *PathBB = BBIDToBlock.lookup(BBID);
+      if (!PathBB) {
+        return make_error<StringError>(Twine("no block with id ") +
+                                           Twine(BBID) + " in function " +
+                                           MF.getName(),
+                                       inconvertibleErrorCode());
+      }
+
+      if (PrevBB && !PrevBB->isSuccessor(PathBB)) {
+        return make_error<StringError>(
+            Twine("block #") + Twine(BBID) + " is not a successor of block #" +
+                Twine(*PrevBB->getBBID()) + " in function " + MF.getName(),
+            inconvertibleErrorCode());
+      }
+
+      if (I != ClonePath.size() - 1 && !PathBB->empty() &&
+          PathBB->back().isIndirectBranch()) {
+        return make_error<StringError>(
+            Twine("block #") + Twine(BBID) +
+                " has indirect branch can only appear as the last block of the "
+                "path, in function " +
+                MF.getName(),
+            inconvertibleErrorCode());
+      }
+      PrevBB = PathBB;
+    }
+  }
+  return Error::success();
+}
+
+// Applies all clonings specified in `ClonePaths` to `MF` and returns a map
+// from `ProfileBBID`s of all clone blocks to their BBIDs (assigned by
+// `MachineFunction`).
+static DenseMap<ProfileBBID, unsigned>
+ApplyCloning(MachineFunction &MF,
+             const SmallVector<SmallVector<unsigned>> &ClonePaths) {
+  DenseMap<ProfileBBID, unsigned> CloneBBIDMap;
+  // Map from the final BB IDs to the `MachineBasicBlock`s.
+  DenseMap<unsigned, MachineBasicBlock *> BBIDToBlock;
+  for (auto &BB : MF)
+    BBIDToBlock.try_emplace(*BB.getBBID(), &BB);
+
+  DenseMap<unsigned, unsigned> NClonesForBBID;
+  auto TII = MF.getSubtarget().getInstrInfo();
+  for (const auto &ClonePath : ClonePaths) {
+
+    MachineBasicBlock *PrevBB = nullptr;
+    for (unsigned BBID : ClonePath) {
+      MachineBasicBlock *OrigBB = BBIDToBlock.at(BBID);
+      if (PrevBB == nullptr) {
+        if (auto FT = OrigBB->getFallThrough(/*JumpToFallThrough=*/false)) {
+          // Make fallthroughs explicit since we may change the layout.
+          TII->insertUnconditionalBranch(*OrigBB, FT,
+                                         OrigBB->findBranchDebugLoc());
+        }
+        PrevBB = OrigBB;
+        continue;
+      }
+
+      MachineBasicBlock *CloneBB = CloneMachineBasicBlock(OrigBB);
+
+      CloneBBIDMap.try_emplace({BBID, ++NClonesForBBID[BBID]},
+                               *CloneBB->getBBID());
+      // Set up the previous block in the path to jump to the clone.
+      PrevBB->ReplaceUsesOfBlockWith(OrigBB, CloneBB);
+      PrevBB = CloneBB;
+    }
+  }
+  return CloneBBIDMap;
+}
+
+// Processes the raw input profile `PathAndClusterInfo` for the given function
+// `MF` and returns the final profile (cluster information). Returns error if
+// `PathAndClusterInfo` is invalid and cannot be applied (e.g., invalid cloning
+// paths).
+//
+// Steps:
+//    1. Clones basic blocks based on `PathAndClusterInfo.ClonePaths` (if not
+//    empty) and
+//       updates the CFG accordingly.
+//    2. Creates and returns the cluster profile for the final blocks (original
+//       and cloned) based on `PathAndClusterInfo.ClusterInfo`.
+static Expected<DenseMap<unsigned, BBClusterInfo<unsigned>>>
+ProcessProfile(MachineFunction &MF,
+               const FunctionPathAndClusterInfo &PathAndClusterInfo) {
+  if (auto Err = CheckValidCloning(MF, PathAndClusterInfo.ClonePaths))
+    return std::move(Err);
+
+  // Apply the clonings and obtain the map from the input block ID of cloned
+  // blocks to their final BB IDs.
+  DenseMap<ProfileBBID, unsigned> CloneBBIDMap =
+      ApplyCloning(MF, PathAndClusterInfo.ClonePaths);
+
+  // Map from final BB IDs to their profile information.
+  DenseMap<unsigned, BBClusterInfo<unsigned>> ClusterInfoByBBID;
+  // This step creates all the necessary clones. It does not adjust the
+  // branches.
+  for (const BBClusterInfo<ProfileBBID> &P : PathAndClusterInfo.ClusterInfo) {
+    unsigned FinalBBID = P.BasicBlockID.CloneID == 0
+                             ? P.BasicBlockID.BBID
+                             : CloneBBIDMap.at(P.BasicBlockID);
+    ClusterInfoByBBID.try_emplace(
+        FinalBBID,
+        BBClusterInfo<unsigned>{FinalBBID, P.ClusterID, P.PositionInCluster});
+  }
+  return ClusterInfoByBBID;
+}
+
 bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
   auto BBSectionsType = MF.getTarget().getBBSectionsType();
   assert(BBSectionsType != BasicBlockSection::None &&
@@ -315,17 +467,18 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
             .getPathAndClusterInfoForFunction(MF.getName());
     if (!HasProfile)
       return true;
-    for (const BBClusterInfo<ProfileBBID> &BBP :
-         PathAndClusterInfo.ClusterInfo) {
-      // TODO: Apply the path cloning profile.
-      assert(!BBP.BasicBlockID.CloneID && "Path cloning is not supported yet");
-      const auto [I, Inserted] = ClusterInfoByBBID.try_emplace(
-          BBP.BasicBlockID.BBID,
-          BBClusterInfo<unsigned>{BBP.BasicBlockID.BBID, BBP.ClusterID,
-                                  BBP.PositionInCluster});
-      (void)I;
-      assert(Inserted && "Duplicate BBID found in profile");
+    auto ClusterInfoOrErr = ProcessProfile(MF, PathAndClusterInfo);
+    if (!ClusterInfoOrErr) {
+      auto Err =
+          handleErrors(ClusterInfoOrErr.takeError(), [&](const StringError &E) {
+            WithColor::warning()
+                << "Rejecting the BBSections profile for function "
+                << MF.getName() << " : " << E.getMessage() << "\n";
+          });
+      assert(!Err);
+      return true;
     }
+    ClusterInfoByBBID = *std::move(ClusterInfoOrErr);
   }
 
   MF.setBBSectionsType(BBSectionsType);
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll b/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll
new file mode 100644
index 000000000000000..0cca18b5aa8ad3a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/basic-block-sections-cloning.ll
@@ -0,0 +1,169 @@
+;; Tests for path cloning with -basic-block-sections.
+
+declare void @effect(i32 zeroext)
+
+;; Test valid application of path cloning.
+; RUN: echo 'v1' > %t1
+; RUN: echo 'f foo' >> %t1
+; RUN: echo 'p 0 3 5' >> %t1
+; RUN: echo 'c 0 3.1 5.1 1 2 3 4 5' >> %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t1 | FileCheck %s --check-prefixes=PATH1,FOOSECTIONS
+; RUN: echo 'v1' > %t2
+; RUN: echo 'f foo' >> %t2
+; RUN: echo 'p 0 3 5' >> %t2
+; RUN: echo 'p 1 3 4 5' >> %t2
+; RUN: echo 'c 0 3.1 5.1' >> %t2
+; RUN: echo 'c 1 3.2 4.1 5.2 2 3 4 5' >> %t2
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t2 | FileCheck %s --check-prefixes=PATH2,FOOSECTIONS
+
+;; Test failed application of path cloning.
+; RUN: echo 'v1' > %t3
+; RUN: echo 'f foo' >> %t3
+; RUN: echo 'p 0 2 3' >> %t3
+; RUN: echo 'c 0 2.1 3.1' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t3 2> %t3.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH3-WARN < %t3.err
+; RUN: echo 'v1' > %t4
+; RUN: echo 'f foo' >> %t4
+; RUN: echo 'p 0 1 100' >> %t4
+; RUN: echo 'c 0' >> %t3
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t4 2> %t4.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH4-WARN < %t4.err
+
+define void @foo(i1 %a, i1 %b, i1 %c, i1 %d) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b3
+
+b1:                                           ; preds = %b0
+  call void @effect(i32 1)
+  br i1 %b, label %b2, label %b3
+
+b2:                                             ; preds = %b1
+  call void @effect(i32 2)
+  br label %b3
+
+b3:                                            ; preds = %b0, %b1, %b2
+  call void @effect(i32 3)
+  br i1 %c, label %b4, label %b5
+
+b4:                                             ; preds = %b3
+  call void @effect(i32 4)
+  br i1 %d, label %b5, label %cold
+
+b5:                                            ; preds = %b3, %b4
+  call void @effect(i32 5)
+  ret void
+cold:
+  call void @effect(i32 6)                     ; preds = %b4
+  ret void
+}
+
+; PATH1:         .section    .text.foo,"ax", at progbits
+; PATH1-LABEL: foo:
+; PATH1:       # %bb.0:        # %b0
+; PATH1:         jne .LBB0_1
+; PATH1-NEXT:  # %bb.7:        # %b3
+; PATH1:         jne .LBB0_4
+; PATH1-NEXT:  # %bb.8:        # %b5
+; PATH1:        retq
+; PATH1-NEXT:  .LBB0_1:        # %b1
+; PATH1:         je .LBB0_3
+; PATH1-NEXT:  # %bb.2:        # %b2
+; PATH1:         callq effect at PLT
+; PATH1-NEXT:  .LBB0_3:        # %b3
+; PATH1:         je .LBB0_5
+; PATH1-NEXT:  .LBB0_4:        # %b4
+; PATH1:         je foo.cold
+; PATH1-NEXT:  .LBB0_5:        # %b5
+; PATH1:         retq
+
+; PATH2:         .section    .text.foo,"ax", at progbits
+; PATH2-LABEL: foo:
+; PATH2:       # %bb.0:        # %b0
+; PATH2:         jne foo.__part.1
+; PATH2-NEXT:  # %bb.7:        # %b3
+; PATH2:         jne .LBB0_4
+; PATH2-NEXT:  # %bb.8:        # %b5
+; PATH2:        retq
+; PATH2:         .section    .text.foo,"ax", at progbits,unique,1
+; PATH2-NEXT:  foo.__part.1:   # %b1
+; PATH2:         jne .LBB0_2
+; PATH2-NEXT:  # %bb.9:        # %b3
+; PATH2:         je .LBB0_5
+; PATH2-NEXT:  # %bb.10:       # %b4
+; PATH2:         je foo.cold
+; PATH2-NEXT:  # %bb.11:       # %b5
+; PATH2:         retq
+; PATH2-NEXT:  .LBB0_2:        # %b2
+; PATH2:         callq	effect at PLT
+; PATH2-NEXT:  # %bb.3:        # %b3
+; PATH2:         je .LBB0_5
+; PATH2-NEXT:  .LBB0_4:        # %b4
+; PATH2:          je foo.cold
+; PATH2-NEXT:  .LBB0_5:       # %b5
+; PATH2:          retq
+; FOOSECTIONS: foo.cold
+
+; PATH3-WARN: warning: Rejecting the BBSections profile for function foo : block #2 is not a successor of block #0 in function foo
+; PATH4-WARN: warning: Rejecting the BBSections profile for function foo : no block with id 100 in function foo
+; NOSECTIONS-NOT: foo.cold
+
+
+;; Test valid application of cloning for paths with indirect branches.
+; RUN: echo 'v1' > %t5
+; RUN: echo 'f bar' >> %t5
+; RUN: echo 'p 0 1' >> %t5
+; RUN: echo 'c 0 1.1 2 1' >> %t5
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t5 | FileCheck %s --check-prefix=PATH5
+; RUN: echo 'v1' > %t6
+; RUN: echo 'f bar' >> %t6
+; RUN: echo 'p 0 1 2' >> %t6
+; RUN: echo 'c 0 1.1 2.1 1' >> %t6
+
+;; Test failed application of path cloning for paths with indirect branches.
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t6 2> %t6.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH-INDIR-WARN < %t6.err
+; RUN: echo 'v1' > %t7
+; RUN: echo 'f bar' >> %t7
+; RUN: echo 'p 1 2' >> %t7
+; RUN: echo 'c 0 1 2.1' >> %t7
+; RUN: llc < %s -mtriple=x86_64-pc-linux -O0 -function-sections -basic-block-sections=%t7 2> %t7.err | FileCheck %s --check-prefix=NOSECTIONS
+; RUN: FileCheck %s --check-prefixes=PATH-INDIR-WARN < %t7.err
+
+
+define void @bar(i1 %a, i1 %b) {
+b0:
+  call void @effect(i32 0)
+  br i1 %a, label %b1, label %b2
+b1:                                              ; preds = %b0
+  call void @effect(i32 1)
+  %0 = select i1 %b,                           ; <ptr> [#uses=1]
+              ptr blockaddress(@bar, %b2),
+              ptr blockaddress(@bar, %b3)
+  indirectbr ptr %0, [label %b2, label %b3]
+b2:                                              ; preds = %b0, %b1
+  call void @effect(i32 2)
+  ret void
+b3:
+  call void @effect(i32 3)                       ; preds = %b1
+  ret void
+}
+
+
+; PATH5:         .section    .text.bar,"ax", at progbits
+; PATH5-LABEL: bar:
+; PATH5:       # %bb.0:        # %b0
+; PATH5:         je .LBB1_2
+; PATH5-NEXT:  # %bb.4:        # %b1
+; PATH5:         jmpq *%rax
+; PATH5-NEXT:  .Ltmp0:         # Block address taken
+; PATH5-NEXT:  .LBB1_2:        # %b2
+; PATH5:         retq
+; PATH5-NEXT:  # %bb.1:        # %b1
+; PATH5:         jmpq *%rax
+; PATH5:         .section    .text.split.bar,"ax", at progbits
+; PATH5:       bar.cold:       # %b3
+; NOSECTIONS-NOT: bar.cold
+
+; PATH-INDIR-WARN: warning: Rejecting the BBSections profile for function bar : block #1 has indirect branch can only appear as the last block of the path, in function bar

``````````

</details>


https://github.com/llvm/llvm-project/pull/68860


More information about the llvm-commits mailing list