[llvm] [CodeGen] Introduce Static Data Splitter pass (PR #122183)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 16:44:03 PST 2025
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/122183
>From 5d207e9f4738b2c49e171a4d9270a294de44ecb4 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 8 Jan 2025 14:28:43 -0800
Subject: [PATCH 1/6] [SDP]Introduce StaticDataSplitter pass and implemenet
jump table splitting
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 +
llvm/include/llvm/CodeGen/MachineFunction.h | 9 +
.../llvm/CodeGen/MachineJumpTableInfo.h | 8 +-
llvm/include/llvm/CodeGen/Passes.h | 4 +
llvm/include/llvm/InitializePasses.h | 1 +
.../llvm/Passes/MachinePassRegistry.def | 1 +
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/CodeGen.cpp | 1 +
llvm/lib/CodeGen/MachineBasicBlock.cpp | 4 +
llvm/lib/CodeGen/MachineFunction.cpp | 13 ++
llvm/lib/CodeGen/StaticDataSplitter.cpp | 153 ++++++++++++++++
llvm/lib/CodeGen/TargetPassConfig.cpp | 1 +
llvm/test/CodeGen/X86/jump-table-partition.ll | 163 ++++++++++++++++++
13 files changed, 363 insertions(+), 2 deletions(-)
create mode 100644 llvm/lib/CodeGen/StaticDataSplitter.cpp
create mode 100644 llvm/test/CodeGen/X86/jump-table-partition.ll
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 7fe33c3913f2dd..9ed356c2ab2331 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -997,6 +997,12 @@ class MachineBasicBlock
/// no changes occurred in the meantime.
bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
+ /// Return an index for MachineJumpTableInfo if \p this basic block ends with
+ /// an indirect jump using a jump table, otherwise -1.
+ /// This function is a thin wrapper and forward calls to the per-target method
+ /// `TargetInstrInfo::getjumpTableIndex`.
+ int getJumpTableIndex() const;
+
void pop_front() { Insts.pop_front(); }
void pop_back() { Insts.pop_back(); }
void push_back(MachineInstr *MI) { Insts.push_back(MI); }
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index d696add8a1af53..c0f983d1c6787b 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -88,6 +88,15 @@ template <> struct ilist_callback_traits<MachineBasicBlock> {
}
};
+// The hotness of static data tracked by a MachineFunction and not represented
+// as a global object in the module IR / MIR. Typical examples are
+// MachineJumpTableInfo and MachineConstantPool.
+enum class DataHotness {
+ Unknown,
+ Cold,
+ Hot,
+};
+
/// MachineFunctionInfo - This class can be derived from and used by targets to
/// hold private target-specific information for each MachineFunction. Objects
/// of type are accessed/created with MF::getInfo and destroyed when the
diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
index e8e9c2f6338e06..cc1f54a81b9bb4 100644
--- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -28,6 +28,7 @@ namespace llvm {
class MachineBasicBlock;
class DataLayout;
class raw_ostream;
+enum class DataHotness;
/// MachineJumpTableEntry - One jump table in the jump table info.
///
@@ -35,8 +36,9 @@ struct MachineJumpTableEntry {
/// MBBs - The vector of basic blocks from which to create the jump table.
std::vector<MachineBasicBlock*> MBBs;
- explicit MachineJumpTableEntry(const std::vector<MachineBasicBlock*> &M)
- : MBBs(M) {}
+ DataHotness Hotness;
+
+ explicit MachineJumpTableEntry(const std::vector<MachineBasicBlock *> &M);
};
class MachineJumpTableInfo {
@@ -107,6 +109,8 @@ class MachineJumpTableInfo {
return JumpTables;
}
+ void updateJumpTableHotness(size_t JTI, DataHotness Hotness);
+
/// RemoveJumpTable - Mark the specific index as being dead. This will
/// prevent it from being emitted.
void RemoveJumpTable(unsigned Idx) {
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index d1fac4a304cffe..16423d03ff7018 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -71,6 +71,10 @@ namespace llvm {
/// using profile information.
MachineFunctionPass *createMachineFunctionSplitterPass();
+ /// createStaticDataSplitterPass - This pass partions static data sections
+ /// into a hot and cold section using profile information.
+ MachineFunctionPass *createStaticDataSplitterPass();
+
/// MachineFunctionPrinter pass - This pass prints out the machine function to
/// the given stream as a debugging tool.
MachineFunctionPass *
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 1cb9013bc48cc5..8111afcc1fb20f 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -293,6 +293,7 @@ void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &);
void initializeSpillPlacementWrapperLegacyPass(PassRegistry &);
void initializeStackColoringLegacyPass(PassRegistry &);
void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &);
+void initializeStaticDataSplitterPass(PassRegistry &);
void initializeStackMapLivenessPass(PassRegistry &);
void initializeStackProtectorPass(PassRegistry &);
void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 5a4e79d7225db1..f4cddafa009711 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -236,6 +236,7 @@ DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass)
DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass)
DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass)
DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass)
+DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter)
DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass)
DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass)
DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadata)
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 145fd2fac8b564..88f863d8204d09 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -226,6 +226,7 @@ add_llvm_component_library(LLVMCodeGen
StackMaps.cpp
StackProtector.cpp
StackSlotColoring.cpp
+ StaticDataSplitter.cpp
SwiftErrorValueTracking.cpp
SwitchLoweringUtils.cpp
TailDuplication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 8efe540770913a..84d92705de0223 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -130,6 +130,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeStackMapLivenessPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
+ initializeStaticDataSplitterPass(Registry);
initializeStripDebugMachineModulePass(Registry);
initializeTailDuplicateLegacyPass(Registry);
initializeTargetPassConfigPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 5ac6472a01e9fc..af298c5994fbb6 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1426,6 +1426,10 @@ bool MachineBasicBlock::canSplitCriticalEdge(
return true;
}
+int MachineBasicBlock::getJumpTableIndex() const {
+ return findJumpTableIndex(*this);
+}
+
/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
/// neighboring instructions so the bundle won't be broken by removing MI.
static void unbundleSingleMI(MachineInstr *MI) {
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index e6b9538fe9a02c..b5a89f3bcf42f1 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1291,6 +1291,10 @@ const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
// MachineJumpTableInfo implementation
//===----------------------------------------------------------------------===//
+MachineJumpTableEntry::MachineJumpTableEntry(
+ const std::vector<MachineBasicBlock *> &MBBs)
+ : MBBs(MBBs), Hotness(DataHotness::Unknown) {}
+
/// Return the size of each entry in the jump table.
unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
// The size of a jump table entry is 4 bytes unless the entry is just the
@@ -1340,6 +1344,15 @@ unsigned MachineJumpTableInfo::createJumpTableIndex(
return JumpTables.size()-1;
}
+void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI,
+ DataHotness Hotness) {
+ assert(JTI < JumpTables.size() && "Invalid JTI!");
+ // Note record the largest hotness is important for mergable data (constant
+ // pools). Even if jump table instances are not merged, record the largest
+ // value seen fwiw.
+ JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness);
+}
+
/// If Old is the target of any jump tables, update the jump tables to branch
/// to New instead.
bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
new file mode 100644
index 00000000000000..14b9c1b3394d2e
--- /dev/null
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -0,0 +1,153 @@
+//===- StaticDataSplitter.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass uses profile information to partition static data sections into
+// hot and cold ones. It begins to split jump tables based on profile, and
+// subsequent patches will handle constant pools and other module internal data.
+//
+// For the original RFC of this pass please see
+// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744.
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "static-data-splitter"
+
+STATISTIC(NumHotJumpTables, "Number of hot jump tables seen");
+STATISTIC(NumColdJumpTables, "Number of cold jump tables seen");
+STATISTIC(NumUnknownJumpTables,
+ "Number of jump tables with unknown hotness. Such jump tables will "
+ "be placed in the hot-suffixed section by default.");
+
+class StaticDataSplitter : public MachineFunctionPass {
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
+ const ProfileSummaryInfo *PSI = nullptr;
+
+ // Returns true iff any jump table is hot-cold categorized.
+ bool splitJumpTables(MachineFunction &MF);
+
+ // Same as above but works on functions with profile information.
+ bool splitJumpTablesWithProfiles(MachineFunction &MF,
+ MachineJumpTableInfo &MJTI);
+
+public:
+ static char ID;
+
+ StaticDataSplitter() : MachineFunctionPass(ID) {
+ initializeStaticDataSplitterPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Static Data Splitter"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+ AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
+ MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ // Split jump tables based on profile information. Subsequent patches will
+ // handle other data types like constant pools, module-internal data, etc.
+ return splitJumpTables(MF);
+}
+
+bool StaticDataSplitter::splitJumpTablesWithProfiles(
+ MachineFunction &MF, MachineJumpTableInfo &MJTI) {
+ int NumChangedJumpTables = 0;
+ // Regard a jump table as hot by default. If the source and all of destination
+ // blocks are cold, regard the jump table as cold.
+ DataHotness Hotness = DataHotness::Hot;
+ for (const auto &MBB : MF) {
+ // IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target
+ // interface `TargetInstrInfo::getjumpTableIndex`, and only X86 implements
+ // it so far.
+ const int JTI = MBB.getJumpTableIndex();
+ // This is not a source block of jump table.
+ if (JTI == -1)
+ continue;
+
+ bool AllBlocksCold = true;
+
+ if (!PSI->isColdBlock(&MBB, MBFI))
+ AllBlocksCold = false;
+
+ for (const MachineBasicBlock *MBB : MJTI.getJumpTables()[JTI].MBBs)
+ if (!PSI->isColdBlock(MBB, MBFI))
+ AllBlocksCold = false;
+
+ if (AllBlocksCold) {
+ Hotness = DataHotness::Cold;
+ ++NumColdJumpTables;
+ } else {
+ ++NumHotJumpTables;
+ }
+
+ MF.getJumpTableInfo()->updateJumpTableHotness(JTI, Hotness);
+ ++NumChangedJumpTables;
+ }
+ return NumChangedJumpTables > 0;
+}
+
+bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ if (!MJTI || MJTI->getJumpTables().empty())
+ return false;
+
+ // Place jump tables according to block hotness if block counters are
+ // available. Check function entry count because BFI depends on it to derive
+ // block counters.
+ if (PSI && PSI->hasProfileSummary() && MBFI &&
+ MF.getFunction().getEntryCount())
+ return splitJumpTablesWithProfiles(MF, *MJTI);
+
+ // Conservatively place all jump tables in the hot-suffixed section if profile
+ // information for the function is not available, or the target doesn't
+ // implement `TargetInstrInfo::getJumpTableIndex` yet.
+ for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++)
+ MF.getJumpTableInfo()->updateJumpTableHotness(JTI, DataHotness::Hot);
+
+ NumUnknownJumpTables += MJTI->getJumpTables().size();
+ return true;
+}
+
+char StaticDataSplitter::ID = 0;
+
+INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false,
+ false)
+
+MachineFunctionPass *llvm::createStaticDataSplitterPass() {
+ return new StaticDataSplitter();
+}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index d407e9f0871d4c..23929672f11d68 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1256,6 +1256,7 @@ void TargetPassConfig::addMachinePasses() {
"performance.\n";
}
}
+ addPass(createStaticDataSplitterPass());
addPass(createMachineFunctionSplitterPass());
}
// We run the BasicBlockSections pass if either we need BB sections or BB
diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll
new file mode 100644
index 00000000000000..3a8f1395f6b283
--- /dev/null
+++ b/llvm/test/CodeGen/X86/jump-table-partition.ll
@@ -0,0 +1,163 @@
+;; -stats requires asserts
+; requires: asserts
+
+; RUN: llc -stop-after=block-placement %s -o - | llc --run-pass=static-data-splitter -stats -x mir -o - 2>&1 | FileCheck %s --check-prefix=STAT
+
+; `func_with_hot_jumptable` contains a hot jump table and `func_with_cold_jumptable` contains a cold one.
+; `func_without_entry_count` simulates the functions without profile information (e.g., not instrumented or not profiled),
+; it's jump table hotness is unknown and regarded as hot conservatively.
+;
+; Tests stat messages are expected.
+; TODO: Update test to verify section suffixes when target-lowering and assembler changes are implemented.
+;
+; STAT-DAG: 1 static-data-splitter - Number of cold jump tables seen
+; STAT-DAG: 1 static-data-splitter - Number of hot jump tables seen
+; STAT-DAG: 1 static-data-splitter - Number of jump tables with unknown hotness
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str.2 = private constant [7 x i8] c"case 3\00"
+ at .str.3 = private constant [7 x i8] c"case 4\00"
+ at .str.4 = private constant [7 x i8] c"case 5\00"
+ at str.9 = private constant [7 x i8] c"case 2\00"
+ at str.10 = private constant [7 x i8] c"case 1\00"
+ at str.11 = private constant [8 x i8] c"default\00"
+
+define i32 @func_with_hot_jumptable(i32 %num) !prof !13 {
+entry:
+ switch i32 %num, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb5
+ i32 5, label %sw.bb7
+ ], !prof !14
+
+sw.bb: ; preds = %entry
+ %puts11 = tail call i32 @puts(ptr @str.10)
+ br label %sw.epilog
+
+sw.bb1: ; preds = %entry
+ %puts = tail call i32 @puts(ptr @str.9)
+ br label %sw.epilog
+
+sw.bb3: ; preds = %entry
+ %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2)
+ br label %sw.bb5
+
+sw.bb5: ; preds = %entry, %sw.bb3
+ %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3)
+ br label %sw.bb7
+
+sw.bb7: ; preds = %entry, %sw.bb5
+ %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4)
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ %puts12 = tail call i32 @puts(ptr @str.11)
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb
+ %div = sdiv i32 %num, 3
+ ret i32 %div
+}
+
+define void @func_with_cold_jumptable(i32 %num) !prof !15 {
+entry:
+ switch i32 %num, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb5
+ i32 5, label %sw.bb7
+ ], !prof !16
+
+sw.bb: ; preds = %entry
+ %puts10 = tail call i32 @puts(ptr @str.10)
+ br label %sw.epilog
+
+sw.bb1: ; preds = %entry
+ %puts = tail call i32 @puts(ptr @str.9)
+ br label %sw.epilog
+
+sw.bb3: ; preds = %entry
+ %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2)
+ br label %sw.bb5
+
+sw.bb5: ; preds = %entry, %sw.bb3
+ %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3)
+ br label %sw.bb7
+
+sw.bb7: ; preds = %entry, %sw.bb5
+ %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4)
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ %puts11 = tail call i32 @puts(ptr @str.11)
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb
+ ret void
+}
+
+define void @func_without_entry_count(i32 %num) {
+entry:
+ switch i32 %num, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb5
+ i32 5, label %sw.bb7
+ ]
+
+sw.bb: ; preds = %entry
+ %puts10 = tail call i32 @puts(ptr @str.10)
+ br label %sw.epilog
+
+sw.bb1: ; preds = %entry
+ %puts = tail call i32 @puts(ptr @str.9)
+ br label %sw.epilog
+
+sw.bb3: ; preds = %entry
+ %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2)
+ br label %sw.bb5
+
+sw.bb5: ; preds = %entry, %sw.bb3
+ %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3)
+ br label %sw.bb7
+
+sw.bb7: ; preds = %entry, %sw.bb5
+ %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4)
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ %puts11 = tail call i32 @puts(ptr @str.11)
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb
+ ret void
+}
+
+declare i32 @puts(ptr)
+declare i32 @printf(ptr, ...)
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 230002}
+!4 = !{!"MaxCount", i64 100000}
+!5 = !{!"MaxInternalCount", i64 50000}
+!6 = !{!"MaxFunctionCount", i64 100000}
+!7 = !{!"NumCounts", i64 14}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12}
+!11 = !{i32 990000, i64 10000, i32 7}
+!12 = !{i32 999999, i64 1, i32 9}
+!13 = !{!"function_entry_count", i64 100000}
+!14 = !{!"branch_weights", i32 50000, i32 10000, i32 10000, i32 10000, i32 10000, i32 10000}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 1, i32 0, i32 0, i32 0, i32 0, i32 0}
>From 34b6b9b45564d994844cc9610edddf026a0e49cc Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 9 Jan 2025 11:09:45 -0800
Subject: [PATCH 2/6] Flag-gate the new pass and resolve review feedback
---
llvm/lib/CodeGen/StaticDataSplitter.cpp | 11 ++++++-----
llvm/lib/CodeGen/TargetPassConfig.cpp | 8 +++++++-
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 14b9c1b3394d2e..482a61027cf985 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -83,7 +83,10 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
MachineFunction &MF, MachineJumpTableInfo &MJTI) {
int NumChangedJumpTables = 0;
// Regard a jump table as hot by default. If the source and all of destination
- // blocks are cold, regard the jump table as cold.
+ // blocks are cold, regard the jump table as cold. While a destination block
+ // does not read a jump table (unless it's also a source block), a hot
+ // destination heuristically makes its jump table hot to accommodate for
+ // potential profile data skews (from sampled profiles, for example).
DataHotness Hotness = DataHotness::Hot;
for (const auto &MBB : MF) {
// IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target
@@ -121,11 +124,9 @@ bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
if (!MJTI || MJTI->getJumpTables().empty())
return false;
- // Place jump tables according to block hotness if block counters are
- // available. Check function entry count because BFI depends on it to derive
- // block counters.
+ // Place jump tables according to block hotness if function has profile data.
if (PSI && PSI->hasProfileSummary() && MBFI &&
- MF.getFunction().getEntryCount())
+ MF.getFunction().hasProfileData())
return splitJumpTablesWithProfiles(MF, *MJTI);
// Conservatively place all jump tables in the hot-suffixed section if profile
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 23929672f11d68..6a964c0910fc61 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -263,6 +263,11 @@ static cl::opt<bool>
GCEmptyBlocks("gc-empty-basic-blocks", cl::init(false), cl::Hidden,
cl::desc("Enable garbage-collecting empty basic blocks"));
+static cl::opt<bool>
+ SplitStaticData("split-static-data", cl::Hidden, cl::init(false),
+ cl::desc("Split static data sections into hot and cold "
+ "section ones using profile information"));
+
/// Allow standard passes to be disabled by command line options. This supports
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
@@ -1256,8 +1261,9 @@ void TargetPassConfig::addMachinePasses() {
"performance.\n";
}
}
- addPass(createStaticDataSplitterPass());
addPass(createMachineFunctionSplitterPass());
+ if (SplitStaticData)
+ addPass(createStaticDataSplitterPass());
}
// We run the BasicBlockSections pass if either we need BB sections or BB
// address map (or both).
>From 1bacc51a5ae03ca9bda60dda4a63de9944d62950 Mon Sep 17 00:00:00 2001
From: Mingming Liu <mingmingl at google.com>
Date: Fri, 10 Jan 2025 17:11:29 -0800
Subject: [PATCH 3/6] Apply suggestions from code review
Co-authored-by: Ellis Hoag <ellis.sparky.hoag at gmail.com>
---
llvm/lib/CodeGen/MachineFunction.cpp | 2 +-
llvm/lib/CodeGen/StaticDataSplitter.cpp | 5 +----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index b5a89f3bcf42f1..117bd988a40af2 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1347,7 +1347,7 @@ unsigned MachineJumpTableInfo::createJumpTableIndex(
void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI,
DataHotness Hotness) {
assert(JTI < JumpTables.size() && "Invalid JTI!");
- // Note record the largest hotness is important for mergable data (constant
+ // Note: recording the largest hotness is important for mergable data (constant
// pools). Even if jump table instances are not merged, record the largest
// value seen fwiw.
JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness);
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 482a61027cf985..55c4b20b76102c 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -97,10 +97,7 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
if (JTI == -1)
continue;
- bool AllBlocksCold = true;
-
- if (!PSI->isColdBlock(&MBB, MBFI))
- AllBlocksCold = false;
+ bool AllBlocksCold = PSI->isColdBlock(&MBB, MBFI);
for (const MachineBasicBlock *MBB : MJTI.getJumpTables()[JTI].MBBs)
if (!PSI->isColdBlock(MBB, MBFI))
>From 8a85d1aa36e22619231c5e079f369c499dd67f6a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 10 Jan 2025 17:17:14 -0800
Subject: [PATCH 4/6] resolve review feedback
---
llvm/include/llvm/CodeGen/MachineFunction.h | 2 +-
.../llvm/CodeGen/MachineJumpTableInfo.h | 9 +-
llvm/include/llvm/CodeGen/Passes.h | 2 +-
llvm/lib/CodeGen/MachineFunction.cpp | 14 +-
llvm/lib/CodeGen/StaticDataSplitter.cpp | 88 +++++---
llvm/test/CodeGen/X86/jump-table-partition.ll | 212 ++++++++++--------
6 files changed, 184 insertions(+), 143 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index c0f983d1c6787b..dcdbcaec168d22 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -91,7 +91,7 @@ template <> struct ilist_callback_traits<MachineBasicBlock> {
// The hotness of static data tracked by a MachineFunction and not represented
// as a global object in the module IR / MIR. Typical examples are
// MachineJumpTableInfo and MachineConstantPool.
-enum class DataHotness {
+enum class MachineFunctionDataHotness {
Unknown,
Cold,
Hot,
diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
index cc1f54a81b9bb4..e3675d6489b350 100644
--- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -28,7 +28,7 @@ namespace llvm {
class MachineBasicBlock;
class DataLayout;
class raw_ostream;
-enum class DataHotness;
+enum class MachineFunctionDataHotness;
/// MachineJumpTableEntry - One jump table in the jump table info.
///
@@ -36,7 +36,7 @@ struct MachineJumpTableEntry {
/// MBBs - The vector of basic blocks from which to create the jump table.
std::vector<MachineBasicBlock*> MBBs;
- DataHotness Hotness;
+ MachineFunctionDataHotness Hotness;
explicit MachineJumpTableEntry(const std::vector<MachineBasicBlock *> &M);
};
@@ -109,7 +109,10 @@ class MachineJumpTableInfo {
return JumpTables;
}
- void updateJumpTableHotness(size_t JTI, DataHotness Hotness);
+ // Update machine jump table entry's hotness. Return true if the hotness is
+ // updated.
+ bool updateJumpTableEntryHotness(size_t JTI,
+ MachineFunctionDataHotness Hotness);
/// RemoveJumpTable - Mark the specific index as being dead. This will
/// prevent it from being emitted.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 16423d03ff7018..b5d2a7e6bf035b 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -71,7 +71,7 @@ namespace llvm {
/// using profile information.
MachineFunctionPass *createMachineFunctionSplitterPass();
- /// createStaticDataSplitterPass - This pass partions static data sections
+ /// createStaticDataSplitterPass - This pass partitions a static data section
/// into a hot and cold section using profile information.
MachineFunctionPass *createStaticDataSplitterPass();
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 117bd988a40af2..d09e93d79aae6c 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1293,7 +1293,7 @@ const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
MachineJumpTableEntry::MachineJumpTableEntry(
const std::vector<MachineBasicBlock *> &MBBs)
- : MBBs(MBBs), Hotness(DataHotness::Unknown) {}
+ : MBBs(MBBs), Hotness(MachineFunctionDataHotness::Unknown) {}
/// Return the size of each entry in the jump table.
unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
@@ -1344,13 +1344,17 @@ unsigned MachineJumpTableInfo::createJumpTableIndex(
return JumpTables.size()-1;
}
-void MachineJumpTableInfo::updateJumpTableHotness(size_t JTI,
- DataHotness Hotness) {
+bool MachineJumpTableInfo::updateJumpTableEntryHotness(
+ size_t JTI, MachineFunctionDataHotness Hotness) {
assert(JTI < JumpTables.size() && "Invalid JTI!");
- // Note: recording the largest hotness is important for mergable data (constant
+ // Note record the largest hotness is important for mergable data (constant
// pools). Even if jump table instances are not merged, record the largest
// value seen fwiw.
- JumpTables[JTI].Hotness = std::max(JumpTables[JTI].Hotness, Hotness);
+ if (Hotness <= JumpTables[JTI].Hotness)
+ return false;
+
+ JumpTables[JTI].Hotness = Hotness;
+ return true;
}
/// If Old is the target of any jump tables, update the jump tables to branch
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 55c4b20b76102c..82673e851a6817 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -6,13 +6,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass uses profile information to partition static data sections into
-// hot and cold ones. It begins to split jump tables based on profile, and
-// subsequent patches will handle constant pools and other module internal data.
+// The pass uses branch profile data to assign hotness based section qualifiers
+// for the following types of static data:
+// - Jump tables
+// - Constant pools (TODO)
+// - Other module-internal data (TODO)
//
// For the original RFC of this pass please see
-// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744.
+// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MBFIWrapper.h"
@@ -35,8 +38,16 @@ using namespace llvm;
STATISTIC(NumHotJumpTables, "Number of hot jump tables seen");
STATISTIC(NumColdJumpTables, "Number of cold jump tables seen");
STATISTIC(NumUnknownJumpTables,
- "Number of jump tables with unknown hotness. Such jump tables will "
- "be placed in the hot-suffixed section by default.");
+ "Number of jump tables with unknown hotness. Option "
+ "-static-data-default-hotness specifies the hotness.");
+
+static cl::opt<MachineFunctionDataHotness> StaticDataDefaultHotness(
+ "static-data-default-hotness", cl::Hidden,
+ cl::desc("This option specifies the hotness of static data when profile "
+ "information is unavailable"),
+ cl::init(MachineFunctionDataHotness::Hot),
+ cl::values(clEnumValN(MachineFunctionDataHotness::Hot, "hot", "Hot"),
+ clEnumValN(MachineFunctionDataHotness::Cold, "cold", "Cold")));
class StaticDataSplitter : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI = nullptr;
@@ -74,20 +85,13 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- // Split jump tables based on profile information. Subsequent patches will
- // handle other data types like constant pools, module-internal data, etc.
return splitJumpTables(MF);
}
bool StaticDataSplitter::splitJumpTablesWithProfiles(
MachineFunction &MF, MachineJumpTableInfo &MJTI) {
int NumChangedJumpTables = 0;
- // Regard a jump table as hot by default. If the source and all of destination
- // blocks are cold, regard the jump table as cold. While a destination block
- // does not read a jump table (unless it's also a source block), a hot
- // destination heuristically makes its jump table hot to accommodate for
- // potential profile data skews (from sampled profiles, for example).
- DataHotness Hotness = DataHotness::Hot;
+
for (const auto &MBB : MF) {
// IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target
// interface `TargetInstrInfo::getjumpTableIndex`, and only X86 implements
@@ -97,21 +101,14 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
if (JTI == -1)
continue;
- bool AllBlocksCold = PSI->isColdBlock(&MBB, MBFI);
+ auto Hotness = MachineFunctionDataHotness::Hot;
- for (const MachineBasicBlock *MBB : MJTI.getJumpTables()[JTI].MBBs)
- if (!PSI->isColdBlock(MBB, MBFI))
- AllBlocksCold = false;
+ // Hotness is based on source basic block hotness.
+ if (PSI->isColdBlock(&MBB, MBFI))
+ Hotness = MachineFunctionDataHotness::Cold;
- if (AllBlocksCold) {
- Hotness = DataHotness::Cold;
- ++NumColdJumpTables;
- } else {
- ++NumHotJumpTables;
- }
-
- MF.getJumpTableInfo()->updateJumpTableHotness(JTI, Hotness);
- ++NumChangedJumpTables;
+ if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness))
+ ++NumChangedJumpTables;
}
return NumChangedJumpTables > 0;
}
@@ -121,18 +118,41 @@ bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
if (!MJTI || MJTI->getJumpTables().empty())
return false;
+ const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
+ MF.getFunction().hasProfileData();
+ auto statOnExit = llvm::make_scope_exit([&] {
+ if (!AreStatisticsEnabled())
+ return;
+
+ if (!ProfileAvailable) {
+ NumUnknownJumpTables += MJTI->getJumpTables().size();
+ return;
+ }
+
+ for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) {
+ auto Hotness = MJTI->getJumpTables()[JTI].Hotness;
+ if (Hotness == MachineFunctionDataHotness::Hot)
+ NumHotJumpTables++;
+ else {
+ assert(Hotness == MachineFunctionDataHotness::Cold &&
+ "A jump table is either hot or cold when profile information is "
+ "available.");
+ NumColdJumpTables++;
+ }
+ }
+ });
+
// Place jump tables according to block hotness if function has profile data.
- if (PSI && PSI->hasProfileSummary() && MBFI &&
- MF.getFunction().hasProfileData())
+ if (ProfileAvailable)
return splitJumpTablesWithProfiles(MF, *MJTI);
- // Conservatively place all jump tables in the hot-suffixed section if profile
- // information for the function is not available, or the target doesn't
- // implement `TargetInstrInfo::getJumpTableIndex` yet.
+ // If function profile is unavailable (e.g., module not instrumented, or new
+ // code paths lacking samples), -static-data-default-hotness specifies the
+ // hotness.
for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++)
- MF.getJumpTableInfo()->updateJumpTableHotness(JTI, DataHotness::Hot);
+ MF.getJumpTableInfo()->updateJumpTableEntryHotness(
+ JTI, StaticDataDefaultHotness);
- NumUnknownJumpTables += MJTI->getJumpTables().size();
return true;
}
diff --git a/llvm/test/CodeGen/X86/jump-table-partition.ll b/llvm/test/CodeGen/X86/jump-table-partition.ll
index 3a8f1395f6b283..1b717451a86e28 100644
--- a/llvm/test/CodeGen/X86/jump-table-partition.ll
+++ b/llvm/test/CodeGen/X86/jump-table-partition.ll
@@ -1,146 +1,160 @@
-;; -stats requires asserts
+; -stats requires asserts
; requires: asserts
-; RUN: llc -stop-after=block-placement %s -o - | llc --run-pass=static-data-splitter -stats -x mir -o - 2>&1 | FileCheck %s --check-prefix=STAT
+; Stop after 'finalize-isel' for simpler MIR, and lower the minimum number of
+; jump table entries so 'switch' needs fewer cases to generate a jump table.
+; RUN: llc -stop-after=finalize-isel -min-jump-table-entries=2 %s -o %t.mir
+; RUN: llc --run-pass=static-data-splitter -stats -x mir %t.mir -o - 2>&1 | FileCheck %s --check-prefix=STAT
-; `func_with_hot_jumptable` contains a hot jump table and `func_with_cold_jumptable` contains a cold one.
-; `func_without_entry_count` simulates the functions without profile information (e.g., not instrumented or not profiled),
-; it's jump table hotness is unknown and regarded as hot conservatively.
-;
; Tests stat messages are expected.
; TODO: Update test to verify section suffixes when target-lowering and assembler changes are implemented.
-;
-; STAT-DAG: 1 static-data-splitter - Number of cold jump tables seen
-; STAT-DAG: 1 static-data-splitter - Number of hot jump tables seen
+; TODO: Also run static-data-splitter pass with -static-data-default-hotness=cold and check data section suffix.
+
+; STAT-DAG: 2 static-data-splitter - Number of cold jump tables seen
+; STAT-DAG: 2 static-data-splitter - Number of hot jump tables seen
; STAT-DAG: 1 static-data-splitter - Number of jump tables with unknown hotness
+; In function @foo, the 2 switch instructions to jt0.* and jt2.* get lowered to hot jump tables,
+; and the 2 switch instructions to jt1.* and jt3.* get lowered to cold jump tables.
+
+; @func_without_profile doesn't have profiles. It's jump table hotness is unknown.
+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
- at .str.2 = private constant [7 x i8] c"case 3\00"
- at .str.3 = private constant [7 x i8] c"case 4\00"
- at .str.4 = private constant [7 x i8] c"case 5\00"
- at str.9 = private constant [7 x i8] c"case 2\00"
- at str.10 = private constant [7 x i8] c"case 1\00"
- at str.11 = private constant [8 x i8] c"default\00"
+ at str.9 = private constant [7 x i8] c".str.9\00"
+ at str.10 = private constant [8 x i8] c".str.10\00"
+ at str.11 = private constant [8 x i8] c".str.11\00"
-define i32 @func_with_hot_jumptable(i32 %num) !prof !13 {
+ at case2 = private constant [7 x i8] c"case 2\00"
+ at case1 = private constant [7 x i8] c"case 1\00"
+ at default = private constant [8 x i8] c"default\00"
+ at jt3 = private constant [4 x i8] c"jt3\00"
+
+define i32 @foo(i32 %num) !prof !13 {
entry:
- switch i32 %num, label %sw.default [
- i32 1, label %sw.bb
- i32 2, label %sw.bb1
- i32 3, label %sw.bb3
- i32 4, label %sw.bb5
- i32 5, label %sw.bb7
+ %mod3 = sdiv i32 %num, 3
+ switch i32 %mod3, label %jt0.default [
+ i32 1, label %jt0.bb1
+ i32 2, label %jt0.bb2
], !prof !14
-sw.bb: ; preds = %entry
- %puts11 = tail call i32 @puts(ptr @str.10)
- br label %sw.epilog
+jt0.bb1:
+ call i32 @puts(ptr @case1)
+ br label %jt0.epilog
-sw.bb1: ; preds = %entry
- %puts = tail call i32 @puts(ptr @str.9)
- br label %sw.epilog
+jt0.bb2:
+ call i32 @puts(ptr @case2)
+ br label %jt0.epilog
-sw.bb3: ; preds = %entry
- %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2)
- br label %sw.bb5
+jt0.default:
+ call i32 @puts(ptr @default)
+ br label %jt0.epilog
-sw.bb5: ; preds = %entry, %sw.bb3
- %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3)
- br label %sw.bb7
+jt0.epilog:
+ %zero = icmp eq i32 %num, 0
+ br i1 %zero, label %hot, label %cold, !prof !15
-sw.bb7: ; preds = %entry, %sw.bb5
- %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4)
- br label %sw.epilog
+hot:
+ %c2 = call i32 @transform(i32 %num)
+ switch i32 %c2, label %jt2.default [
+ i32 1, label %jt2.bb1
+ i32 2, label %jt2.bb2
+ ], !prof !14
-sw.default: ; preds = %entry
- %puts12 = tail call i32 @puts(ptr @str.11)
- br label %sw.epilog
+jt2.bb1:
+ call i32 @puts(ptr @case1)
+ br label %jt1.epilog
-sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb
- %div = sdiv i32 %num, 3
- ret i32 %div
-}
+jt2.bb2:
+ call i32 @puts(ptr @case2)
+ br label %jt1.epilog
-define void @func_with_cold_jumptable(i32 %num) !prof !15 {
-entry:
- switch i32 %num, label %sw.default [
- i32 1, label %sw.bb
- i32 2, label %sw.bb1
- i32 3, label %sw.bb3
- i32 4, label %sw.bb5
- i32 5, label %sw.bb7
- ], !prof !16
+jt2.default:
+ call i32 @puts(ptr @default)
+ br label %jt2.epilog
-sw.bb: ; preds = %entry
- %puts10 = tail call i32 @puts(ptr @str.10)
- br label %sw.epilog
+jt2.epilog:
+ %c2cmp = icmp ne i32 %c2, 0
+ br i1 %c2cmp, label %return, label %jt3.prologue, !prof !16
-sw.bb1: ; preds = %entry
- %puts = tail call i32 @puts(ptr @str.9)
- br label %sw.epilog
+cold:
+ %c1 = call i32 @compute(i32 %num)
+ switch i32 %c1, label %jt1.default [
+ i32 1, label %jt1.bb1
+ i32 2, label %jt1.bb2
+ ], !prof !14
-sw.bb3: ; preds = %entry
- %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2)
- br label %sw.bb5
+jt1.bb1:
+ call i32 @puts(ptr @case1)
+ br label %jt1.epilog
-sw.bb5: ; preds = %entry, %sw.bb3
- %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3)
- br label %sw.bb7
+jt1.bb2:
+ call i32 @puts(ptr @case2)
+ br label %jt1.epilog
-sw.bb7: ; preds = %entry, %sw.bb5
- %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4)
- br label %sw.epilog
+jt1.default:
+ call i32 @puts(ptr @default)
+ br label %jt1.epilog
-sw.default: ; preds = %entry
- %puts11 = tail call i32 @puts(ptr @str.11)
- br label %sw.epilog
+jt1.epilog:
+ br label %return
-sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb
- ret void
+jt3.prologue:
+ %c3 = call i32 @cleanup(i32 %num)
+ switch i32 %c3, label %jt3.default [
+ i32 1, label %jt3.bb1
+ i32 2, label %jt3.bb2
+ ], !prof !14
+
+jt3.bb1:
+ call i32 @puts(ptr @case1)
+ br label %jt3.epilog
+
+jt3.bb2:
+ call i32 @puts(ptr @case2)
+ br label %jt3.epilog
+
+jt3.default:
+ call i32 @puts(ptr @default)
+ br label %jt3.epilog
+
+jt3.epilog:
+ call i32 @puts(ptr @jt3)
+ br label %return
+
+return:
+ ret i32 %mod3
}
-define void @func_without_entry_count(i32 %num) {
+define void @func_without_profile(i32 %num) {
entry:
switch i32 %num, label %sw.default [
i32 1, label %sw.bb
i32 2, label %sw.bb1
- i32 3, label %sw.bb3
- i32 4, label %sw.bb5
- i32 5, label %sw.bb7
]
-sw.bb: ; preds = %entry
- %puts10 = tail call i32 @puts(ptr @str.10)
+sw.bb:
+ call i32 @puts(ptr @str.10)
br label %sw.epilog
-sw.bb1: ; preds = %entry
- %puts = tail call i32 @puts(ptr @str.9)
- br label %sw.epilog
-
-sw.bb3: ; preds = %entry
- %call4 = tail call i32 (ptr, ...) @printf(ptr @.str.2)
- br label %sw.bb5
-
-sw.bb5: ; preds = %entry, %sw.bb3
- %call6 = tail call i32 (ptr, ...) @printf(ptr @.str.3)
- br label %sw.bb7
-
-sw.bb7: ; preds = %entry, %sw.bb5
- %call8 = tail call i32 (ptr, ...) @printf(ptr @.str.4)
+sw.bb1:
+ call i32 @puts(ptr @str.9)
br label %sw.epilog
-sw.default: ; preds = %entry
- %puts11 = tail call i32 @puts(ptr @str.11)
+sw.default:
+ call i32 @puts(ptr @str.11)
br label %sw.epilog
-sw.epilog: ; preds = %sw.default, %sw.bb7, %sw.bb1, %sw.bb
+sw.epilog:
ret void
}
declare i32 @puts(ptr)
declare i32 @printf(ptr, ...)
+declare i32 @compute(i32)
+declare i32 @transform(i32)
+declare i32 @cleanup(i32)
!llvm.module.flags = !{!0}
@@ -158,6 +172,6 @@ declare i32 @printf(ptr, ...)
!11 = !{i32 990000, i64 10000, i32 7}
!12 = !{i32 999999, i64 1, i32 9}
!13 = !{!"function_entry_count", i64 100000}
-!14 = !{!"branch_weights", i32 50000, i32 10000, i32 10000, i32 10000, i32 10000, i32 10000}
-!15 = !{!"function_entry_count", i64 1}
-!16 = !{!"branch_weights", i32 1, i32 0, i32 0, i32 0, i32 0, i32 0}
+!14 = !{!"branch_weights", i32 60000, i32 20000, i32 20000}
+!15 = !{!"branch_weights", i32 1, i32 99999}
+!16 = !{!"branch_weights", i32 99998, i32 1}
>From e54dacbbbf94538674868b7a8ae1a86dccac44fb Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sun, 12 Jan 2025 22:20:27 -0800
Subject: [PATCH 5/6] Discover jump table by calling MachineOperand::isJTI()
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 ---
llvm/lib/CodeGen/MachineBasicBlock.cpp | 4 --
llvm/lib/CodeGen/StaticDataSplitter.cpp | 40 +++++++++++--------
3 files changed, 24 insertions(+), 26 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 9ed356c2ab2331..7fe33c3913f2dd 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -997,12 +997,6 @@ class MachineBasicBlock
/// no changes occurred in the meantime.
bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const;
- /// Return an index for MachineJumpTableInfo if \p this basic block ends with
- /// an indirect jump using a jump table, otherwise -1.
- /// This function is a thin wrapper and forward calls to the per-target method
- /// `TargetInstrInfo::getjumpTableIndex`.
- int getJumpTableIndex() const;
-
void pop_front() { Insts.pop_front(); }
void pop_back() { Insts.pop_back(); }
void push_back(MachineInstr *MI) { Insts.push_back(MI); }
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index af298c5994fbb6..5ac6472a01e9fc 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1426,10 +1426,6 @@ bool MachineBasicBlock::canSplitCriticalEdge(
return true;
}
-int MachineBasicBlock::getJumpTableIndex() const {
- return findJumpTableIndex(*this);
-}
-
/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's
/// neighboring instructions so the bundle won't be broken by removing MI.
static void unbundleSingleMI(MachineInstr *MI) {
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 82673e851a6817..2277a15ea6e3e3 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -92,23 +92,31 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
MachineFunction &MF, MachineJumpTableInfo &MJTI) {
int NumChangedJumpTables = 0;
+ // Jump table could be used by either terminating instructions or
+ // non-terminating ones, so we walk all instructions and use
+ // `MachineOperand::isJTI()` to identify jump table operands.
+ // Similarly, `MachineOperand::isCPI()` can identify constant pool usages
+ // in the same loop.
for (const auto &MBB : MF) {
- // IMPORTANT, `getJumpTableIndex` is a thin wrapper around per-target
- // interface `TargetInstrInfo::getjumpTableIndex`, and only X86 implements
- // it so far.
- const int JTI = MBB.getJumpTableIndex();
- // This is not a source block of jump table.
- if (JTI == -1)
- continue;
-
- auto Hotness = MachineFunctionDataHotness::Hot;
-
- // Hotness is based on source basic block hotness.
- if (PSI->isColdBlock(&MBB, MBFI))
- Hotness = MachineFunctionDataHotness::Cold;
-
- if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness))
- ++NumChangedJumpTables;
+ for (const MachineInstr &I : MBB) {
+ for (const MachineOperand &Op : I.operands()) {
+ if (!Op.isJTI())
+ continue;
+ const int JTI = Op.getIndex();
+ // This is not a source block of jump table.
+ if (JTI == -1)
+ continue;
+
+ auto Hotness = MachineFunctionDataHotness::Hot;
+
+ // Hotness is based on source basic block hotness.
+ if (PSI->isColdBlock(&MBB, MBFI))
+ Hotness = MachineFunctionDataHotness::Cold;
+
+ if (MF.getJumpTableInfo()->updateJumpTableEntryHotness(JTI, Hotness))
+ ++NumChangedJumpTables;
+ }
+ }
}
return NumChangedJumpTables > 0;
}
>From 366bbbfc88bc0a0a32e434ff0f4b13ca9444cef3 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 16 Jan 2025 16:40:41 -0800
Subject: [PATCH 6/6] resolve comments
---
llvm/lib/CodeGen/StaticDataSplitter.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 2277a15ea6e3e3..2fd77e99229c60 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -110,6 +110,8 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
auto Hotness = MachineFunctionDataHotness::Hot;
// Hotness is based on source basic block hotness.
+ // TODO: PSI APIs are about instruction hotness. Introduce API for data
+ // access hotness.
if (PSI->isColdBlock(&MBB, MBFI))
Hotness = MachineFunctionDataHotness::Cold;
More information about the llvm-commits
mailing list