[llvm] c8a70f4 - [CodeGen][StaticDataPartitioning]Place local-linkage global variables in hot or unlikely prefixed sections based on profile information (#125756)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 28 16:31:49 PDT 2025


Author: Mingming Liu
Date: 2025-03-28T16:31:46-07:00
New Revision: c8a70f4c6e24076ac3fc18bfb9e2a41ece83e7fc

URL: https://github.com/llvm/llvm-project/commit/c8a70f4c6e24076ac3fc18bfb9e2a41ece83e7fc
DIFF: https://github.com/llvm/llvm-project/commit/c8a70f4c6e24076ac3fc18bfb9e2a41ece83e7fc.diff

LOG: [CodeGen][StaticDataPartitioning]Place local-linkage global variables in hot or unlikely prefixed sections based on profile information (#125756)

In this PR, static-data-splitter pass finds out the local-linkage global
variables in {`.rodata`, `.data.rel.ro`, `bss`, `.data`} sections by
analyzing machine instruction operands, and aggregates their accesses
from code across functions.

A follow-up item is to analyze global variable initializers and count
for access from data.
* This limitation is demonstrated by `bss2` and `data3` in
`llvm/test/CodeGen/X86/global-variable-partition.ll`.

Some stats of static-data-splitter with this patch:

**section**|**bss**|**rodata**|**data**
:-----:|:-----:|:-----:|:-----:
hot-prefixed section coverage|99.75%|97.71%|91.30%
unlikely-prefixed section size percentage|67.94%|39.37%|63.10%

1. The coverage is defined as `#perf-sample-in-hot-prefixed <data>
section / #perf-sample in <data.*> section` for each <data> section.
* The perf command samples
`MEM_INST_RETIRED.ALL_LOADS:u:pinned:precise=2` events at a high
frequency (`perf -c 2251`) for 30 seconds. The profiled binary is built
as non-PIE so `data.rel.ro` coverage data is not available.
2. The unlikely-prefixed `<data>` section size percentage is defined as
`unlikely <data> section size / the sum size of <data>.* sections` for
each `<data>` section

Added: 
    llvm/include/llvm/Analysis/StaticDataProfileInfo.h
    llvm/lib/Analysis/StaticDataProfileInfo.cpp
    llvm/lib/CodeGen/StaticDataAnnotator.cpp
    llvm/test/CodeGen/X86/global-variable-partition.ll

Modified: 
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/Passes/MachinePassRegistry.def
    llvm/lib/Analysis/CMakeLists.txt
    llvm/lib/CodeGen/CMakeLists.txt
    llvm/lib/CodeGen/CodeGen.cpp
    llvm/lib/CodeGen/StaticDataSplitter.cpp
    llvm/lib/CodeGen/TargetPassConfig.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
new file mode 100644
index 0000000000000..9e2e5fbfc6761
--- /dev/null
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -0,0 +1,75 @@
+#ifndef LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
+#define LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A class that holds the constants that represent static data and their
+/// profile information and provides methods to operate on them.
+class StaticDataProfileInfo {
+public:
+  /// Accummulate the profile count of a constant that will be lowered to static
+  /// data sections.
+  DenseMap<const Constant *, uint64_t> ConstantProfileCounts;
+
+  /// Keeps track of the constants that are seen at least once without profile
+  /// counts.
+  DenseSet<const Constant *> ConstantWithoutCounts;
+
+  /// If \p C has a count, return it. Otherwise, return std::nullopt.
+  std::optional<uint64_t> getConstantProfileCount(const Constant *C) const;
+
+public:
+  StaticDataProfileInfo() = default;
+
+  /// If \p Count is not nullopt, add it to the profile count of the constant \p
+  /// C in a saturating way, and clamp the count to \p getInstrMaxCountValue if
+  /// the result exceeds it. Otherwise, mark the constant as having no profile
+  /// count.
+  void addConstantProfileCount(const Constant *C,
+                               std::optional<uint64_t> Count);
+
+  /// Return a section prefix for the constant \p C based on its profile count.
+  /// - If a constant doesn't have a counter, return an empty string.
+  /// - Otherwise,
+  ///   - If it has a hot count, return "hot".
+  ///   - If it is seen by unprofiled function, return an empty string.
+  ///   - If it has a cold count, return "unlikely".
+  ///   - Otherwise (e.g. it's used by lukewarm functions), return an empty
+  ///     string.
+  StringRef getConstantSectionPrefix(const Constant *C,
+                                     const ProfileSummaryInfo *PSI) const;
+};
+
+/// This wraps the StaticDataProfileInfo object as an immutable pass, for a
+/// backend pass to operate on.
+class StaticDataProfileInfoWrapperPass : public ImmutablePass {
+public:
+  static char ID;
+  StaticDataProfileInfoWrapperPass();
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
+
+  StaticDataProfileInfo &getStaticDataProfileInfo() { return *Info; }
+  const StaticDataProfileInfo &getStaticDataProfileInfo() const {
+    return *Info;
+  }
+
+  /// This pass provides StaticDataProfileInfo for reads/writes but does not
+  /// modify \p M or other analysis. All analysis are preserved.
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+private:
+  std::unique_ptr<StaticDataProfileInfo> Info;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_STATICDATAPROFILEINFO_H

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index e5cb028b25dd9..d214ab9306c2f 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -71,10 +71,15 @@ namespace llvm {
   /// using profile information.
   MachineFunctionPass *createMachineFunctionSplitterPass();
 
-  /// createStaticDataSplitterPass - This pass partitions a static data section
-  /// into a hot and cold section using profile information.
+  /// createStaticDataSplitterPass - This is a machine-function pass that
+  /// categorizes static data hotness using profile information.
   MachineFunctionPass *createStaticDataSplitterPass();
 
+  /// createStaticDataAnnotatorPASS - This is a module pass that reads from
+  /// StaticDataProfileInfoWrapperPass and annotates the section prefix of
+  /// global variables.
+  ModulePass *createStaticDataAnnotatorPass();
+
   /// MachineFunctionPrinter pass - This pass prints out the machine function to
   /// the given stream as a debugging tool.
   MachineFunctionPass *

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index e820277724393..0fb76624eb2c9 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -203,6 +203,8 @@ void initializeMachineLoopInfoWrapperPassPass(PassRegistry &);
 void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
 void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry &);
 void initializeMachineOutlinerPass(PassRegistry &);
+void initializeStaticDataProfileInfoWrapperPassPass(PassRegistry &);
+void initializeStaticDataAnnotatorPass(PassRegistry &);
 void initializeMachinePipelinerPass(PassRegistry &);
 void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializeMachineRegionInfoPassPass(PassRegistry &);

diff  --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 956304560b683..73c4d34faa5a3 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -263,6 +263,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
 #define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME)
 #endif
 DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass)
+DUMMY_MACHINE_MODULE_PASS("static-data-annotator", StaticDataAnnotator)
 DUMMY_MACHINE_MODULE_PASS("pseudo-probe-inserter", PseudoProbeInserterPass)
 DUMMY_MACHINE_MODULE_PASS("mir-debugify", DebugifyMachineModule)
 DUMMY_MACHINE_MODULE_PASS("mir-check-debugify", CheckDebugMachineModulePass)

diff  --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 39b9fd0930854..fbf3b587d6bd2 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -127,6 +127,7 @@ add_llvm_component_library(LLVMAnalysis
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp
   ScalarEvolutionNormalization.cpp
+  StaticDataProfileInfo.cpp
   StackLifetime.cpp
   StackSafetyAnalysis.cpp
   StructuralHash.cpp

diff  --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
new file mode 100644
index 0000000000000..a435aa00c2e9d
--- /dev/null
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -0,0 +1,72 @@
+#include "llvm/Analysis/StaticDataProfileInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include <sys/types.h>
+
+using namespace llvm;
+void StaticDataProfileInfo::addConstantProfileCount(
+    const Constant *C, std::optional<uint64_t> Count) {
+  if (!Count) {
+    ConstantWithoutCounts.insert(C);
+    return;
+  }
+  uint64_t &OriginalCount = ConstantProfileCounts[C];
+  OriginalCount = llvm::SaturatingAdd(*Count, OriginalCount);
+  // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few
+  // large values for special use.
+  if (OriginalCount > getInstrMaxCountValue())
+    OriginalCount = getInstrMaxCountValue();
+}
+
+std::optional<uint64_t>
+StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
+  auto I = ConstantProfileCounts.find(C);
+  if (I == ConstantProfileCounts.end())
+    return std::nullopt;
+  return I->second;
+}
+
+StringRef StaticDataProfileInfo::getConstantSectionPrefix(
+    const Constant *C, const ProfileSummaryInfo *PSI) const {
+  auto Count = getConstantProfileCount(C);
+  if (!Count)
+    return "";
+  // The accummulated counter shows the constant is hot. Return 'hot' whether
+  // this variable is seen by unprofiled functions or not.
+  if (PSI->isHotCount(*Count))
+    return "hot";
+  // The constant is not hot, and seen by unprofiled functions. We don't want to
+  // assign it to unlikely sections, even if the counter says 'cold'. So return
+  // an empty prefix before checking whether the counter is cold.
+  if (ConstantWithoutCounts.count(C))
+    return "";
+  // The accummulated counter shows the constant is cold. Return 'unlikely'.
+  if (PSI->isColdCount(*Count))
+    return "unlikely";
+  // The counter says lukewarm. Return an empty prefix.
+  return "";
+}
+
+bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
+  Info.reset(new StaticDataProfileInfo());
+  return false;
+}
+
+bool StaticDataProfileInfoWrapperPass::doFinalization(Module &M) {
+  Info.reset();
+  return false;
+}
+
+INITIALIZE_PASS(StaticDataProfileInfoWrapperPass, "static-data-profile-info",
+                "Static Data Profile Info", false, true)
+
+StaticDataProfileInfoWrapperPass::StaticDataProfileInfoWrapperPass()
+    : ImmutablePass(ID) {
+  initializeStaticDataProfileInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+char StaticDataProfileInfoWrapperPass::ID = 0;

diff  --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 0e237ba31a8ca..5dd6413431255 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -228,6 +228,7 @@ add_llvm_component_library(LLVMCodeGen
   StackProtector.cpp
   StackSlotColoring.cpp
   StaticDataSplitter.cpp
+  StaticDataAnnotator.cpp
   SwiftErrorValueTracking.cpp
   SwitchLoweringUtils.cpp
   TailDuplication.cpp

diff  --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 9544151bc7771..771e45ce71595 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -132,6 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringLegacyPass(Registry);
   initializeStaticDataSplitterPass(Registry);
+  initializeStaticDataAnnotatorPass(Registry);
   initializeStripDebugMachineModulePass(Registry);
   initializeTailDuplicateLegacyPass(Registry);
   initializeTargetPassConfigPass(Registry);

diff  --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
new file mode 100644
index 0000000000000..edf85aef41c86
--- /dev/null
+++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
@@ -0,0 +1,109 @@
+//===- StaticDataAnnotator - Annotate static data's section prefix --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// To reason about module-wide data hotness in a module granularity, this file
+// implements a module pass StaticDataAnnotator to work coordinately with the
+// StaticDataSplitter pass.
+//
+// The StaticDataSplitter pass is a machine function pass. It analyzes data
+// hotness based on code and adds counters in StaticDataProfileInfo via its
+// wrapper pass StaticDataProfileInfoWrapper.
+// The StaticDataProfileInfoWrapper sits in the middle between the
+// StaticDataSplitter and StaticDataAnnotator passes.
+// The StaticDataAnnotator pass is a module pass. It iterates global variables
+// in the module, looks up counters from StaticDataProfileInfo and sets the
+// section prefix based on profiles.
+//
+// The three-pass structure is implemented for practical reasons, to work around
+// the limitation that a module pass based on legacy pass manager cannot make
+// use of MachineBlockFrequencyInfo analysis. In the future, we can consider
+// porting the StaticDataSplitter pass to a module-pass using the new pass
+// manager framework. That way, analysis are lazily computed as opposed to
+// eagerly scheduled, and a module pass can use MachineBlockFrequencyInfo.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "static-data-annotator"
+
+using namespace llvm;
+
+/// A module pass which iterates global variables in the module and annotates
+/// their section prefixes based on profile-driven analysis.
+class StaticDataAnnotator : public ModulePass {
+public:
+  static char ID;
+
+  StaticDataProfileInfo *SDPI = nullptr;
+  const ProfileSummaryInfo *PSI = nullptr;
+
+  StaticDataAnnotator() : ModulePass(ID) {
+    initializeStaticDataAnnotatorPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<StaticDataProfileInfoWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.setPreservesAll();
+    ModulePass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return "Static Data Annotator"; }
+
+  bool runOnModule(Module &M) override;
+};
+
+bool StaticDataAnnotator::runOnModule(Module &M) {
+  SDPI = &getAnalysis<StaticDataProfileInfoWrapperPass>()
+              .getStaticDataProfileInfo();
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+  if (!PSI->hasProfileSummary())
+    return false;
+
+  bool Changed = false;
+  for (auto &GV : M.globals()) {
+    if (GV.isDeclarationForLinker())
+      continue;
+
+    // The implementation below assumes prior passes don't set section prefixes,
+    // and specifically do 'assign' rather than 'update'. So report error if a
+    // section prefix is already set.
+    if (auto maybeSectionPrefix = GV.getSectionPrefix();
+        maybeSectionPrefix && !maybeSectionPrefix->empty())
+      llvm::report_fatal_error("Global variable " + GV.getName() +
+                               " already has a section prefix " +
+                               *maybeSectionPrefix);
+
+    StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI);
+    if (SectionPrefix.empty())
+      continue;
+
+    GV.setSectionPrefix(SectionPrefix);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+char StaticDataAnnotator::ID = 0;
+
+INITIALIZE_PASS(StaticDataAnnotator, DEBUG_TYPE, "Static Data Annotator", false,
+                false)
+
+ModulePass *llvm::createStaticDataAnnotatorPass() {
+  return new StaticDataAnnotator();
+}

diff  --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 0965fe85acfc7..60501b4495082 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -9,15 +9,15 @@
 // The pass uses branch profile data to assign hotness based section qualifiers
 // for the following types of static data:
 // - Jump tables
+// - Module-internal global variables
 // - Constant pools (TODO)
-// - Other module-internal data (TODO)
 //
 // For the original RFC of this pass please see
 // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
 
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -27,9 +27,12 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 
@@ -45,15 +48,32 @@ class StaticDataSplitter : public MachineFunctionPass {
   const MachineBranchProbabilityInfo *MBPI = nullptr;
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   const ProfileSummaryInfo *PSI = nullptr;
+  StaticDataProfileInfo *SDPI = nullptr;
 
-  // Update LLVM statistics for a machine function without profiles.
-  void updateStatsWithoutProfiles(const MachineFunction &MF);
-  // Update LLVM statistics for a machine function with profiles.
-  void updateStatsWithProfiles(const MachineFunction &MF);
+  // If the global value is a local linkage global variable, return it.
+  // Otherwise, return nullptr.
+  const GlobalVariable *getLocalLinkageGlobalVariable(const GlobalValue *GV);
+
+  // Returns true if the global variable is in one of {.rodata, .bss, .data,
+  // .data.rel.ro} sections.
+  bool inStaticDataSection(const GlobalVariable &GV, const TargetMachine &TM);
+
+  // Returns the constant if the operand refers to a global variable or constant
+  // that gets lowered to static data sections. Otherwise, return nullptr.
+  const Constant *getConstant(const MachineOperand &Op,
+                              const TargetMachine &TM);
 
   // Use profiles to partition static data.
   bool partitionStaticDataWithProfiles(MachineFunction &MF);
 
+  // Update LLVM statistics for a machine function with profiles.
+  void updateStatsWithProfiles(const MachineFunction &MF);
+
+  // Update LLVM statistics for a machine function without profiles.
+  void updateStatsWithoutProfiles(const MachineFunction &MF);
+
+  void annotateStaticDataWithoutProfiles(const MachineFunction &MF);
+
 public:
   static char ID;
 
@@ -68,6 +88,9 @@ class StaticDataSplitter : public MachineFunctionPass {
     AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<StaticDataProfileInfoWrapperPass>();
+    // This pass does not modify the CFG.
+    AU.setPreservesCFG();
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -78,10 +101,14 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
+  SDPI = &getAnalysis<StaticDataProfileInfoWrapperPass>()
+              .getStaticDataProfileInfo();
+
   const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
                                 MF.getFunction().hasProfileData();
 
   if (!ProfileAvailable) {
+    annotateStaticDataWithoutProfiles(MF);
     updateStatsWithoutProfiles(MF);
     return false;
   }
@@ -92,9 +119,25 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op,
+                                                const TargetMachine &TM) {
+  if (!Op.isGlobal())
+    return nullptr;
+
+  // Find global variables with local linkage.
+  const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+  // Skip 'llvm.'-prefixed global variables conservatively because they are
+  // often handled specially, and skip those not in static data sections.
+  if (!GV || GV->getName().starts_with("llvm.") ||
+      !inStaticDataSection(*GV, TM))
+    return nullptr;
+  return GV;
+}
+
 bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
+  const TargetMachine &TM = MF.getTarget();
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
 
   // Jump table could be used by either terminating instructions or
@@ -105,6 +148,11 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
+        if (!Op.isJTI() && !Op.isGlobal())
+          continue;
+
+        std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+
         if (Op.isJTI()) {
           assert(MJTI != nullptr && "Jump table info is not available.");
           const int JTI = Op.getIndex();
@@ -117,11 +165,13 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
           // Hotness is based on source basic block hotness.
           // TODO: PSI APIs are about instruction hotness. Introduce API for
           // data access hotness.
-          if (PSI->isColdBlock(&MBB, MBFI))
+          if (Count && PSI->isColdCount(*Count))
             Hotness = MachineFunctionDataHotness::Cold;
 
           if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
             ++NumChangedJumpTables;
+        } else if (const Constant *C = getConstant(Op, TM)) {
+          SDPI->addConstantProfileCount(C, Count);
         }
       }
     }
@@ -129,6 +179,22 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   return NumChangedJumpTables > 0;
 }
 
+const GlobalVariable *
+StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) {
+  // LLVM IR Verifier requires that a declaration must have valid declaration
+  // linkage, and local linkages are not among the valid ones. So there is no
+  // need to check GV is not a declaration here.
+  return (GV && GV->hasLocalLinkage()) ? dyn_cast<GlobalVariable>(GV) : nullptr;
+}
+
+bool StaticDataSplitter::inStaticDataSection(const GlobalVariable &GV,
+                                             const TargetMachine &TM) {
+
+  SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(&GV, TM);
+  return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() ||
+         Kind.isBSS();
+}
+
 void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
   if (!AreStatisticsEnabled())
     return;
@@ -147,6 +213,15 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
   }
 }
 
+void StaticDataSplitter::annotateStaticDataWithoutProfiles(
+    const MachineFunction &MF) {
+  for (const auto &MBB : MF)
+    for (const MachineInstr &I : MBB)
+      for (const MachineOperand &Op : I.operands())
+        if (const Constant *C = getConstant(Op, MF.getTarget()))
+          SDPI->addConstantProfileCount(C, std::nullopt);
+}
+
 void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) {
   if (!AreStatisticsEnabled())
     return;
@@ -163,6 +238,7 @@ INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data",
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StaticDataProfileInfoWrapperPass)
 INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false,
                     false)
 

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index f788ec5ecb15b..fa1bb84ec5319 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1257,8 +1257,13 @@ void TargetPassConfig::addMachinePasses() {
       }
     }
     addPass(createMachineFunctionSplitterPass());
-    if (SplitStaticData || TM->Options.EnableStaticDataPartitioning)
+    if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) {
+      // The static data splitter pass is a machine function pass. and
+      // static data annotator pass is a module-wide pass. See the file comment
+      // in StaticDataAnnotator.cpp for the motivation.
       addPass(createStaticDataSplitterPass());
+      addPass(createStaticDataAnnotatorPass());
+    }
   }
   // We run the BasicBlockSections pass if either we need BB sections or BB
   // address map (or both).

diff  --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
new file mode 100644
index 0000000000000..91084d038cfe0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -0,0 +1,229 @@
+; The static-data-splitter processes data from @cold_func first,
+; @unprofiled_func secondly, and @hot_func after the two functions above.
+; Tests that data hotness is based on aggregated module-wide profile
+; information. This way linker-mergable data is emitted once per module.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The three RUN commands set `-relocation-model=pic` so `hot_relro_array` and
+; `cold_relro_array` are placed in the .data.rel.ro-prefixed section.
+
+; This RUN command sets `-data-sections=true -unique-section-names=true` so data
+; sections are uniqufied by numbers.
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=true \
+; RUN:     -unique-section-names=true -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,COMMON --dump-input=always
+
+; This RUN command sets `-data-sections=true -unique-section-names=false` so
+; data sections are uniqufied by variable names.
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=true \
+; RUN:     -unique-section-names=false -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,COMMON --dump-input=always
+
+; This RUN command sets `-data-sections=false -unique-section-names=false`.
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=false \
+; RUN:     -unique-section-names=false -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,COMMON --dump-input=always
+
+; For @.str and @.str.1
+; COMMON:      .type .L.str, at object
+; SYM-NEXT:    .section .rodata.str1.1.hot.
+; UNIQ-NEXT:   .section	.rodata.str1.1.hot.,"aMS", at progbits,1
+; AGG-NEXT:    .section	.rodata.str1.1.hot
+; COMMON-NEXT: .L.str:
+; COMMON-NEXT:    "hot\t"
+; COMMON:      .L.str.1:
+; COMMON-NEXT:    "%d\t%d\t%d\n"
+
+; For @hot_relro_array
+; COMMON:      .type hot_relro_array, at object
+; SYM-NEXT:    .section	.data.rel.ro.hot.hot_relro_array
+; UNIQ-NEXT:   .section	.data.rel.ro.hot.,"aw", at progbits,unique,3
+; AGG-NEXT:    .section	.data.rel.ro.hot.,"aw", at progbits
+
+; For @hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}.
+; COMMON:      .type hot_data, at object
+; SYM-NEXT:    .section	.data.hot.hot_data,"aw", at progbits
+; UNIQ-NEXT:   .section	.data.hot.,"aw", at progbits,unique,4
+; AGG-NEXT:    .section	.data.hot.,"aw", at progbits
+
+; For @hot_bss, which is accessed by {unprofiled_func, hot_func}.
+; COMMON:      .type hot_bss, at object
+; SYM-NEXT:    .section	.bss.hot.hot_bss,"aw", at nobits
+; UNIQ-NEXT:   .section	.bss.hot.,"aw", at nobits,unique,5
+; AGG-NEXT:    .section .bss.hot.,"aw", at nobits
+
+; For @.str.2
+; COMMON:      .type .L.str.2, at object
+; SYM-NEXT:    .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; UNIQ-NEXT:   .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; AGG-NEXT:    .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; COMMON-NEXT: .L.str.2:
+; COMMON-NEXT:    "cold%d\t%d\t%d\n"
+
+; For @cold_bss
+; COMMON:      .type cold_bss, at object
+; SYM-NEXT:    .section	.bss.unlikely.cold_bss,"aw", at nobits
+; UNIQ-NEXT:   .section	.bss.unlikely.,"aw", at nobits,unique,6
+; AGG-NEXT:    .section	.bss.unlikely.,"aw", at nobits
+
+; For @cold_data
+; COMMON:      .type cold_data, at object
+; SYM-NEXT:    .section	.data.unlikely.cold_data,"aw", at progbits
+; UNIQ-NEXT:   .section	.data.unlikely.,"aw", at progbits,unique,7
+; AGG-NEXT:    .section	.data.unlikely.,"aw", at progbits
+
+; For @cold_data_custom_foo_section
+; It has an explicit section 'foo' and shouldn't have hot or unlikely suffix.
+; COMMON:      .type cold_data_custom_foo_section, at object
+; SYM-NEXT:    .section foo,"aw", at progbits
+; UNIQ-NEXT:   .section foo,"aw", at progbits
+; AGG-NEXT:    .section foo,"aw", at progbits
+
+; For @cold_relro_array
+; COMMON:      .type cold_relro_array, at object
+; SYM-NEXT:    .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
+; UNIQ-NEXT:   .section	.data.rel.ro.unlikely.,"aw", at progbits,unique,8
+; AGG-NEXT:    .section	.data.rel.ro.unlikely.,"aw", at progbits
+
+; Currently static-data-splitter only analyzes access from code.
+; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
+; and @cold_relro_array. A follow-up item is to analyze indirect access via data
+; and prune the unlikely list.
+; For @bss2
+; COMMON:      .type bss2, at object
+; SYM-NEXT:    .section	.bss.unlikely.bss2,"aw", at nobits
+; UNIQ-NEXT:   .section	.bss.unlikely.,"aw", at nobits,unique,9
+; AGG-NEXT:    .section	.bss.unlikely.,"aw", at nobits
+
+; For @data3
+; COMMON:      .type data3, at object
+; SYM-NEXT:    .section	.data.unlikely.data3,"aw", at progbits
+; UNIQ-NEXT:   .section	.data.unlikely.,"aw", at progbits,unique,10
+; AGG-NEXT:    .section	.data.unlikely.,"aw", at progbits
+
+; For @data_with_unknown_hotness
+; SYM: 	       .type	.Ldata_with_unknown_hotness, at object          # @data_with_unknown_hotness
+; SYM:         .section .data..Ldata_with_unknown_hotness,"aw", at progbits
+; UNIQ:        .section  .data,"aw", at progbits,unique,11
+; The `.section` directive is omitted for .data with -unique-section-names=false.
+; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+; AGG:         .data
+; COMMON:      .Ldata_with_unknown_hotness:
+
+; For @hot_data_custom_bar_section
+; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix.
+; COMMON:      .type hot_data_custom_bar_section, at object
+; SYM-NEXT:    .section bar,"aw", at progbits
+; SYM:         hot_data_custom_bar_section
+; UNIQ:        .section bar,"aw", at progbits
+; AGG:         .section bar,"aw", at progbits
+
+ at .str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
+ at .str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
+ at hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
+ at hot_data = internal global i32 5
+ at hot_bss = internal global i32 0
+ at .str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
+ at cold_bss = internal global i32 0
+ at cold_data = internal global i32 4
+ at cold_data_custom_foo_section = internal global i32 100, section "foo"
+ at cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
+ at bss2 = internal global i32 0
+ at data3 = internal global i32 3
+ at data_with_unknown_hotness = private global i32 5
+ at hot_data_custom_bar_section = internal global i32 101 #0
+
+define void @cold_func(i32 %0) !prof !15 {
+  %2 = load i32, ptr @cold_bss
+  %3 = load i32, ptr @cold_data
+  %4 = srem i32 %0, 2
+  %5 = sext i32 %4 to i64
+  %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5
+  %7 = load ptr, ptr %6
+  %8 = load i32, ptr %7
+  %9 = load i32, ptr @data_with_unknown_hotness
+  %11 = load i32, ptr @hot_data
+  %12 = load i32, ptr @cold_data_custom_foo_section
+  %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11, i32 %12)
+  ret void
+}
+
+define i32 @unprofiled_func() {
+  %a = load i32, ptr @data_with_unknown_hotness
+  %b = load i32, ptr @hot_data
+  %c = load i32, ptr @hot_bss
+  %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c)
+  ret i32 %ret
+}
+
+define void @hot_func(i32 %0) !prof !14 {
+  %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str)
+  %3 = srem i32 %0, 2
+  %4 = sext i32 %3 to i64
+  %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
+  %6 = load ptr, ptr %5
+  %7 = load i32, ptr %6
+  %8 = load i32, ptr @hot_data
+  %9 = load i32, ptr @hot_bss
+  %10 = load i32, ptr @hot_data_custom_bar_section
+  %11 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9, i32 %10)
+  ret void
+}
+
+define i32 @main(i32 %0, ptr %1) !prof !15 {
+  br label %11
+
+5:                                                ; preds = %11
+  %6 = call i32 @rand()
+  store i32 %6, ptr @cold_bss
+  store i32 %6, ptr @cold_data
+  store i32 %6, ptr @bss2
+  store i32 %6, ptr @data3
+  call void @cold_func(i32 %6)
+  ret i32 0
+
+11:                                               ; preds = %11, %2
+  %12 = phi i32 [ 0, %2 ], [ %19, %11 ]
+  %13 = call i32 @rand()
+  %14 = srem i32 %13, 2
+  %15 = sext i32 %14 to i64
+  %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15
+  %17 = load ptr, ptr %16
+  store i32 %13, ptr %17
+  store i32 %13, ptr @hot_data
+  %18 = add i32 %13, 1
+  store i32 %18, ptr @hot_bss
+  call void @hot_func(i32 %12)
+  %19 = add i32 %12, 1
+  %20 = icmp eq i32 %19, 100000
+  br i1 %20, label %5, label %11, !prof !16
+}
+
+declare i32 @rand()
+declare i32 @func_taking_arbitrary_param(...)
+
+attributes #0 = {"data-section"="bar"}
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460183}
+!5 = !{!"MaxCount", i64 849024}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849024}
+!8 = !{!"NumCounts", i64 23627}
+!9 = !{!"NumFunctions", i64 3271}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13}
+!12 = !{i32 990000, i64 166, i32 73}
+!13 = !{i32 999999, i64 3, i32 1443}
+!14 = !{!"function_entry_count", i64 100000}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 1, i32 99999}


        


More information about the llvm-commits mailing list