[llvm] [CodeGen][StaticDataPartitioning]Place local-linkage global variables in hot or unlikely prefixed sections based on profile information (PR #125756)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 7 17:45:18 PST 2025


https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/125756

>From 8eea1ea0109f07d590d189bc17d5f5411556d77a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 4 Feb 2025 11:19:44 -0800
Subject: [PATCH 1/4] [CodeGen][StaticDataPartitioning]Place module-internal
 global variables based on profile information

---
 llvm/include/llvm/IR/Function.h               |   6 -
 llvm/include/llvm/IR/GlobalObject.h           |  11 +
 llvm/include/llvm/IR/MDBuilder.h              |   4 +-
 llvm/lib/CodeGen/StaticDataSplitter.cpp       | 201 +++++++++++++-----
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |   6 +
 llvm/lib/IR/Function.cpp                      |  16 --
 llvm/lib/IR/Globals.cpp                       |  30 +++
 llvm/lib/IR/MDBuilder.cpp                     |   6 +-
 llvm/test/CodeGen/X86/data-section-prefix.ll  |  27 +++
 .../CodeGen/X86/global-variable-partition.ll  | 159 ++++++++++++++
 .../CodeGenPrepare/X86/section-samplepgo.ll   |   4 +-
 .../Transforms/CodeGenPrepare/X86/section.ll  |   4 +-
 .../Transforms/HotColdSplit/coldentrycount.ll |   4 +-
 .../section-accurate-samplepgo.ll             |   6 +-
 14 files changed, 396 insertions(+), 88 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/data-section-prefix.ll
 create mode 100644 llvm/test/CodeGen/X86/global-variable-partition.ll

diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index fcd5396ccfdbc87..29041688124bc29 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -346,12 +346,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
   /// sample PGO, to enable the same inlines as the profiled optimized binary.
   DenseSet<GlobalValue::GUID> getImportGUIDs() const;
 
-  /// Set the section prefix for this function.
-  void setSectionPrefix(StringRef Prefix);
-
-  /// Get the section prefix for this function.
-  std::optional<StringRef> getSectionPrefix() const;
-
   /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
   ///                             to use during code generation.
   bool hasGC() const {
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 08edc13d81f880a..bb50c39813e1407 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -124,6 +124,17 @@ class GlobalObject : public GlobalValue {
   /// appropriate default object file section.
   void setSection(StringRef S);
 
+  /// Set the section prefix for this global object.
+  void setSectionPrefix(StringRef Prefix);
+
+  /// Update the section prefix, unless the existing prefix is the same as
+  /// `KeepPrefix`.
+  void updateSectionPrefix(StringRef Prefix,
+                           std::optional<StringRef> KeepPrefix = std::nullopt);
+
+  /// Get the section prefix for this global object.
+  std::optional<StringRef> getSectionPrefix() const;
+
   bool hasComdat() const { return getComdat() != nullptr; }
   const Comdat *getComdat() const { return ObjComdat; }
   Comdat *getComdat() { return ObjComdat; }
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index e02ec8f5a3d8bb1..ce4e1da656049d1 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -89,8 +89,8 @@ class MDBuilder {
   MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
                                    const DenseSet<GlobalValue::GUID> *Imports);
 
-  /// Return metadata containing the section prefix for a function.
-  MDNode *createFunctionSectionPrefix(StringRef Prefix);
+  /// Return metadata containing the section prefix for a global object.
+  MDNode *createGlobalObjectSectionPrefix(StringRef Prefix);
 
   /// Return metadata containing the pseudo probe descriptor for a function.
   MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName);
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e5bf0a5a3a255f6..f09e3b41e0723e6 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -9,13 +9,13 @@
 // The pass uses branch profile data to assign hotness based section qualifiers
 // for the following types of static data:
 // - Jump tables
+// - Module-internal global variables
 // - Constant pools (TODO)
-// - Other module-internal data (TODO)
 //
 // For the original RFC of this pass please see
 // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
 
-#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
@@ -27,9 +27,12 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 
@@ -46,12 +49,27 @@ class StaticDataSplitter : public MachineFunctionPass {
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   const ProfileSummaryInfo *PSI = nullptr;
 
-  // Returns true iff any jump table is hot-cold categorized.
-  bool splitJumpTables(MachineFunction &MF);
+  void updateStats(bool ProfileAvailable, const MachineJumpTableInfo *MJTI);
+  void updateJumpTableStats(bool ProfileAvailable,
+                            const MachineJumpTableInfo &MJTI);
 
-  // Same as above but works on functions with profile information.
-  bool splitJumpTablesWithProfiles(const MachineFunction &MF,
-                                   MachineJumpTableInfo &MJTI);
+  // Use profiles to partition static data.
+  bool partitionStaticDataWithProfiles(MachineFunction &MF);
+
+  // If the global value is a local linkage global variable, return it.
+  // Otherwise, return nullptr.
+  const GlobalVariable *getLocalLinkageGlobalVariable(const GlobalValue *GV);
+
+  // Returns true if the global variable is in one of {.rodata, .bss, .data,
+  // .data.rel.ro} sections
+  bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
+
+  // Iterate all global variables in the module and update the section prefix
+  // of the module-internal data.
+  void updateGlobalVariableSectionPrefix(MachineFunction &MF);
+
+  // Accummulated data profile count across machine functions in the module.
+  DenseMap<const GlobalVariable *, APInt> DataProfileCounts;
 
 public:
   static char ID;
@@ -77,13 +95,24 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
-  return splitJumpTables(MF);
+  const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
+                                MF.getFunction().hasProfileData();
+  bool Changed = false;
+
+  if (ProfileAvailable)
+    Changed |= partitionStaticDataWithProfiles(MF);
+
+  updateGlobalVariableSectionPrefix(MF);
+  updateStats(ProfileAvailable, MF.getJumpTableInfo());
+  return Changed;
 }
 
-bool StaticDataSplitter::splitJumpTablesWithProfiles(
-    const MachineFunction &MF, MachineJumpTableInfo &MJTI) {
+bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
+  const TargetMachine &TM = MF.getTarget();
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
   // `MachineOperand::isJTI()` to identify jump table operands.
@@ -92,63 +121,131 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isJTI())
-          continue;
-        const int JTI = Op.getIndex();
-        // This is not a source block of jump table.
-        if (JTI == -1)
+        std::optional<uint64_t> Count = std::nullopt;
+        if (!Op.isJTI() && !Op.isGlobal())
           continue;
 
-        auto Hotness = MachineFunctionDataHotness::Hot;
+        Count = MBFI->getBlockProfileCount(&MBB);
+
+        if (Op.isJTI()) {
+          assert(MJTI != nullptr && "Jump table info is not available.");
+          const int JTI = Op.getIndex();
+          // This is not a source block of jump table.
+          if (JTI == -1)
+            continue;
+
+          auto Hotness = MachineFunctionDataHotness::Hot;
+
+          // Hotness is based on source basic block hotness.
+          // TODO: PSI APIs are about instruction hotness. Introduce API for
+          // data access hotness.
+          if (Count && PSI->isColdCount(*Count))
+            Hotness = MachineFunctionDataHotness::Cold;
 
-        // Hotness is based on source basic block hotness.
-        // TODO: PSI APIs are about instruction hotness. Introduce API for data
-        // access hotness.
-        if (PSI->isColdBlock(&MBB, MBFI))
-          Hotness = MachineFunctionDataHotness::Cold;
+          if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
+            ++NumChangedJumpTables;
+        } else if (Op.isGlobal()) {
+          // Find global variables with local linkage
+          const GlobalVariable *GV =
+              getLocalLinkageGlobalVariable(Op.getGlobal());
+          if (!GV || !inStaticDataSection(GV, TM))
+            continue;
 
-        if (MJTI.updateJumpTableEntryHotness(JTI, Hotness))
-          ++NumChangedJumpTables;
+          // Acccumulate data profile count across machine function
+          // instructions.
+          // TODO: Analyze global variable's initializers.
+          if (Count) {
+            auto [It, Inserted] =
+                DataProfileCounts.try_emplace(GV, APInt(128, 0));
+            It->second += *Count;
+          }
+        }
       }
     }
   }
   return NumChangedJumpTables > 0;
 }
 
-bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
-  if (!MJTI || MJTI->getJumpTables().empty())
-    return false;
-
-  const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
-                                MF.getFunction().hasProfileData();
-  auto statOnExit = llvm::make_scope_exit([&] {
-    if (!AreStatisticsEnabled())
-      return;
+void StaticDataSplitter::updateJumpTableStats(
+    bool ProfileAvailable, const MachineJumpTableInfo &MJTI) {
+  if (!ProfileAvailable) {
+    NumUnknownJumpTables += MJTI.getJumpTables().size();
+    return;
+  }
 
-    if (!ProfileAvailable) {
-      NumUnknownJumpTables += MJTI->getJumpTables().size();
-      return;
+  for (size_t JTI = 0; JTI < MJTI.getJumpTables().size(); JTI++) {
+    auto Hotness = MJTI.getJumpTables()[JTI].Hotness;
+    if (Hotness == MachineFunctionDataHotness::Hot) {
+      ++NumHotJumpTables;
+    } else {
+      assert(Hotness == MachineFunctionDataHotness::Cold &&
+             "A jump table is either hot or cold when profile information is "
+             "available.");
+      ++NumColdJumpTables;
     }
+  }
+}
 
-    for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) {
-      auto Hotness = MJTI->getJumpTables()[JTI].Hotness;
-      if (Hotness == MachineFunctionDataHotness::Hot) {
-        ++NumHotJumpTables;
-      } else {
-        assert(Hotness == MachineFunctionDataHotness::Cold &&
-               "A jump table is either hot or cold when profile information is "
-               "available.");
-        ++NumColdJumpTables;
-      }
-    }
-  });
+void StaticDataSplitter::updateStats(bool ProfileAvailable,
+                                     const MachineJumpTableInfo *MJTI) {
+  if (!AreStatisticsEnabled())
+    return;
 
-  // Place jump tables according to block hotness if function has profile data.
-  if (ProfileAvailable)
-    return splitJumpTablesWithProfiles(MF, *MJTI);
+  if (MJTI)
+    updateJumpTableStats(ProfileAvailable, *MJTI);
+}
 
-  return true;
+const GlobalVariable *
+StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) {
+  if (!GV || GV->isDeclarationForLinker())
+    return nullptr;
+
+  return GV->hasLocalLinkage() ? dyn_cast<GlobalVariable>(GV) : nullptr;
+}
+
+bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
+                                             const TargetMachine &TM) {
+  assert(GV && "Caller guaranteed");
+
+  // Skip LLVM reserved symbols.
+  if (GV->getName().starts_with("llvm."))
+    return false;
+
+  SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+  return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() ||
+         Kind.isBSS();
+}
+
+void StaticDataSplitter::updateGlobalVariableSectionPrefix(
+    MachineFunction &MF) {
+  for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
+    if (GV.isDeclarationForLinker())
+      continue;
+    // DataProfileCounts accumulates data profile count across all machine
+    // function instructions, and it can't model the indirect accesses through
+    // other global variables' initializers.
+    // TODO: Analyze the users of module-internal global variables and see
+    // through the users' initializers. Do not place a global variable into
+    // unlikely section if any of its users are potentially hot.
+    auto Iter = DataProfileCounts.find(&GV);
+    if (Iter == DataProfileCounts.end())
+      continue;
+
+    // StaticDataSplitter is made a machine function pass rather than a module
+    // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function
+    // analysis pass and cannot be used for a legacy module pass.
+    // As a result, we use `DataProfileCounts` to accumulate data
+    // profile count across machine functions and update global variable section
+    // prefix once per machine function.
+    // FIXME: Make StaticDataSplitter a module pass under new pass manager
+    // framework, and set global variable section prefix once per module after
+    // analyzing all machine functions.
+    if (PSI->isColdCount(Iter->second.getZExtValue())) {
+      GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot")));
+    } else if (PSI->isHotCount(Iter->second.getZExtValue())) {
+      GV.updateSectionPrefix("hot");
+    }
+  }
 }
 
 char StaticDataSplitter::ID = 0;
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3c2c7c8c9fed69a..d20ab29cc197974 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -670,6 +670,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
   }
 
   bool HasPrefix = false;
+
   if (const auto *F = dyn_cast<Function>(GO)) {
     // Jump table hotness takes precedence over its enclosing function's hotness
     // if it's known. The function's section prefix is used if jump table entry
@@ -687,6 +688,11 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
       raw_svector_ostream(Name) << '.' << *Prefix;
       HasPrefix = true;
     }
+  } else if (const auto *GV = dyn_cast<GlobalVariable>(GO)) {
+    if (std::optional<StringRef> Prefix = GV->getSectionPrefix()) {
+      raw_svector_ostream(Name) << '.' << *Prefix;
+      HasPrefix = true;
+    }
   }
 
   if (UniqueSectionName) {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index e6f0d64d071ba67..5666f0a53866fda 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1164,22 +1164,6 @@ DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
   return R;
 }
 
-void Function::setSectionPrefix(StringRef Prefix) {
-  MDBuilder MDB(getContext());
-  setMetadata(LLVMContext::MD_section_prefix,
-              MDB.createFunctionSectionPrefix(Prefix));
-}
-
-std::optional<StringRef> Function::getSectionPrefix() const {
-  if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
-    assert(cast<MDString>(MD->getOperand(0))->getString() ==
-               "function_section_prefix" &&
-           "Metadata not match");
-    return cast<MDString>(MD->getOperand(1))->getString();
-  }
-  return std::nullopt;
-}
-
 bool Function::nullPointerIsDefined() const {
   return hasFnAttribute(Attribute::NullPointerIsValid);
 }
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index db5e1cb57b1bab8..884089262e4659d 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -286,6 +287,35 @@ void GlobalObject::setSection(StringRef S) {
   setGlobalObjectFlag(HasSectionHashEntryBit, !S.empty());
 }
 
+void GlobalObject::setSectionPrefix(StringRef Prefix) {
+  MDBuilder MDB(getContext());
+  setMetadata(LLVMContext::MD_section_prefix,
+              MDB.createGlobalObjectSectionPrefix(Prefix));
+}
+
+void GlobalObject::updateSectionPrefix(StringRef Prefix,
+                                       std::optional<StringRef> KeepPrefix) {
+  auto SectionPrefix = getSectionPrefix();
+  if (SectionPrefix && (*SectionPrefix == Prefix ||
+                        (KeepPrefix && *SectionPrefix == *KeepPrefix)))
+    return;
+
+  setSectionPrefix(Prefix);
+  return;
+}
+
+std::optional<StringRef> GlobalObject::getSectionPrefix() const {
+  if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
+    [[maybe_unused]] StringRef MDName =
+        cast<MDString>(MD->getOperand(0))->getString();
+    assert((MDName == "section_prefix" ||
+            (isa<Function>(this) && MDName == "function_section_prefix")) &&
+           "Metadata not match");
+    return cast<MDString>(MD->getOperand(1))->getString();
+  }
+  return std::nullopt;
+}
+
 bool GlobalValue::isNobuiltinFnDef() const {
   const Function *F = dyn_cast<Function>(this);
   if (!F || F->empty())
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 26c8ab9fc36c850..b6aa8844a7eafa7 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -87,9 +87,9 @@ MDNode *MDBuilder::createFunctionEntryCount(
   return MDNode::get(Context, Ops);
 }
 
-MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {
-  return MDNode::get(
-      Context, {createString("function_section_prefix"), createString(Prefix)});
+MDNode *MDBuilder::createGlobalObjectSectionPrefix(StringRef Prefix) {
+  return MDNode::get(Context,
+                     {createString("section_prefix"), createString(Prefix)});
 }
 
 MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
diff --git a/llvm/test/CodeGen/X86/data-section-prefix.ll b/llvm/test/CodeGen/X86/data-section-prefix.ll
new file mode 100644
index 000000000000000..4812fc70758fbce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/data-section-prefix.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-linux-gnu -data-sections %s -o - | FileCheck %s --check-prefix=ELF
+; RUN: llc -mtriple x86_64-linux-gnu -unique-section-names=0 -data-sections %s -o - | FileCheck %s --check-prefix=ELF-NOUNIQ
+
+; RUN: llc -mtriple x86_64-windows-msvc -data-sections %s -o - | FileCheck %s --check-prefix=COFF-MSVC
+
+; ELF: .section .data.hot.foo,
+; ELF: .section .data.bar,
+; ELF: .section .bss.unlikely.baz,
+; ELF: .section .bss.quz,
+
+; ELF-NOUNIQ: .section    .data.hot.,"aw", at progbits,unique,1
+; ELF-NOUNIQ: .section    .data,"aw", at progbits,unique,2
+; ELF-NOUNIQ: .section    .bss.unlikely.,"aw", at nobits,unique,3
+; ELF-NOUNIQ: .section    .bss,"aw", at nobits,unique,4
+
+; COFF-MSVC: .section .data,"dw",one_only,foo
+; COFF-MSVC: .section .data,"dw",one_only,bar
+; COFF-MSVC: .section .bss,"bw",one_only,baz
+; COFF-MSVC: .section .bss,"bw",one_only,quz
+
+ at foo = global i32 1, !section_prefix !0
+ at bar = global i32 2
+ at baz = global i32 0, !section_prefix !1
+ at quz = global i32 0
+
+!0 = !{!"section_prefix", !"hot"}
+!1 = !{!"section_prefix", !"unlikely"}
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
new file mode 100644
index 000000000000000..d457d766999e246
--- /dev/null
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -0,0 +1,159 @@
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=true \
+; RUN:     -unique-section-names=true -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=true \
+; RUN:     -unique-section-names=false -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=false \
+; RUN:     -unique-section-names=false -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA
+
+; SYM: .section .rodata.str1.1.hot.
+; UNIQ: .section	.rodata.str1.1.hot.,"aMS", at progbits,1
+; AGG: .section	.rodata.str1.1.hot
+; DATA: .L.str
+; DATA:    "hot\t"
+; DATA: .L.str.1
+; DATA:    "%d\t%d\t%d\n"
+
+
+; SYM:  .section	.data.rel.ro.hot.hot_relro_array
+; SYM: .section	.data.hot.hot_data,"aw", at progbits
+; SYM: .section	.bss.hot.hot_bss,"aw", at nobits
+
+; UNIQ: .section	.data.rel.ro.hot.,"aw", at progbits,unique,3
+; UNIQ: .section	.data.hot.,"aw", at progbits,unique,4
+; UNIQ: .section	.bss.hot.,"aw", at nobits,unique,5
+
+; AGG: .section	.data.rel.ro.hot.,"aw", at progbits
+; AGG: .section	.data.hot.,"aw", at progbits
+; AGG: .section .bss.hot.,"aw", at nobits
+
+
+; SYM: .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; UNIQ: section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; AGG: .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; DATA: .L.str.2:
+; DATA:    "cold%d\t%d\t%d\n"
+
+
+; SYM: .section	.bss.unlikely.cold_bss,"aw", at nobits
+; SYM: .section	.data.unlikely.cold_data,"aw", at progbits
+; SYM: .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
+; SYM: .section	.bss.unlikely._ZL4bss2,"aw", at nobits
+; SYM: .section	.data.unlikely._ZL5data3,"aw", at progbits
+
+; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,6
+; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,7
+; UNIQ: .section	.data.rel.ro.unlikely.,"aw", at progbits,unique,8
+; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,9
+; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,10
+
+; AGG: .section	.bss.unlikely.,"aw", at nobits
+; AGG: .section	.data.unlikely.,"aw", at progbits
+; AGG: .section	.data.rel.ro.unlikely.,"aw", at progbits
+; AGG: .section	.bss.unlikely.,"aw", at nobits
+; AGG: .section	.data.unlikely.,"aw", at progbits
+
+ at .str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
+ at .str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
+ at hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3]
+ at hot_data = internal global i32 5
+ at hot_bss = internal global i32 0
+ at .str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
+ at cold_bss = internal global i32 0
+ at cold_data = internal global i32 4
+ at cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2]
+ at _ZL4bss2 = internal global i32 0
+ at _ZL5data3 = internal global i32 3
+
+define void @hot_callee(i32 %0) !prof !51 {
+  %2 = call i32 (ptr, ...) @printf(ptr @.str)
+  %3 = srem i32 %0, 2
+  %4 = sext i32 %3 to i64
+  %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
+  %6 = load ptr, ptr %5
+  %7 = load i32, ptr %6
+  %8 = load i32, ptr @hot_data
+  %9 = load i32, ptr @hot_bss
+  %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9)
+  ret void
+}
+
+define void @cold_callee(i32 %0) !prof !52 {
+  %2 = load i32, ptr @cold_bss
+  %3 = load i32, ptr @cold_data
+  %4 = srem i32 %0, 2
+  %5 = sext i32 %4 to i64
+  %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5
+  %7 = load ptr, ptr %6
+  %8 = load i32, ptr %7
+  %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8)
+  ret void
+}
+
+define i32 @main(i32 %0, ptr %1) !prof !52 {
+  %3 = call i64 @time(ptr null)
+  %4 = trunc i64 %3 to i32
+  call void @srand(i32 %4)
+  br label %11
+
+5:                                                ; preds = %11
+  %6 = call i32 @rand()
+  store i32 %6, ptr @cold_bss
+  store i32 %6, ptr @cold_data
+  store i32 %6, ptr @_ZL4bss2
+  store i32 %6, ptr @_ZL5data3
+  call void @cold_callee(i32 %6)
+  ret i32 0
+
+11:                                               ; preds = %11, %2
+  %12 = phi i32 [ 0, %2 ], [ %19, %11 ]
+  %13 = call i32 @rand()
+  %14 = srem i32 %13, 2
+  %15 = sext i32 %14 to i64
+  %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15
+  %17 = load ptr, ptr %16
+  store i32 %13, ptr %17
+  store i32 %13, ptr @hot_data
+  %18 = add i32 %13, 1
+  store i32 %18, ptr @hot_bss
+  call void @hot_callee(i32 %12)
+  %19 = add i32 %12, 1
+  %20 = icmp eq i32 %19, 100000
+  br i1 %20, label %5, label %11, !prof !53
+}
+
+declare void @srand(i32)
+declare i64 @time(ptr)
+declare i32 @rand()
+declare i32 @printf(ptr, ...)
+
+!llvm.module.flags = !{!12}
+
+!12 = !{i32 1, !"ProfileSummary", !13}
+!13 = !{!14, !15, !16, !17, !18, !19, !20, !23}
+!14 = !{!"ProfileFormat", !"InstrProf"}
+!15 = !{!"TotalCount", i64 1460183}
+!16 = !{!"MaxCount", i64 849024}
+!17 = !{!"MaxInternalCount", i64 32769}
+!18 = !{!"MaxFunctionCount", i64 849024}
+!19 = !{!"NumCounts", i64 23627}
+!20 = !{!"NumFunctions", i64 3271}
+!23 = !{!"DetailedSummary", !24}
+!24 = !{!36, !40}
+!36 = !{i32 990000, i64 166, i32 73}
+!40 = !{i32 999999, i64 1, i32 1443}
+!51 = !{!"function_entry_count", i64 100000}
+!52 = !{!"function_entry_count", i64 1}
+!53 = !{!"branch_weights", i32 1, i32 99999}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
index 58af88d8cf3653d..48d02e5cebc69c8 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
@@ -34,8 +34,8 @@ define void @cold_func() !prof !16 {
   ret void
 }
 
-; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 !llvm.module.flags = !{!1}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
index 6dad1122e429455..4baa0b5baa4be5d 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
@@ -66,8 +66,8 @@ define void @cold_func3() !prof !16 {
   ret void
 }
 
-; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 !llvm.module.flags = !{!1}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
index 6e5ef1aa253921f..1e8825e651ec4c8 100644
--- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
+++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
@@ -27,9 +27,9 @@ declare void @sink() cold
 ; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]]
 
 ; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100}
-; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !"hot"}
+; CHECK: ![[LIKELY]] = !{!"section_prefix", !"hot"}
 ; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0}
-; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[UNLIKELY]] = !{!"section_prefix", !"unlikely"}
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"ProfileSummary", !1}
diff --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
index ef2ddbc33cee4c5..af4b875818f6fed 100644
--- a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
+++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
@@ -36,11 +36,11 @@ attributes #1 = { "use-sample-profile" }
 
 ; CHECK: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
 ; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 ; UNKNOWN: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
-; UNKNOWN: ![[UNKNOWN_ID]] = !{!"function_section_prefix", !"unknown"}
+; UNKNOWN: ![[UNKNOWN_ID]] = !{!"section_prefix", !"unknown"}
 ; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
-; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; ACCURATE: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 !llvm.module.flags = !{!1}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}

>From 93d9881754b4713a6202011a2e1ffe520cf80367 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 4 Feb 2025 12:13:52 -0800
Subject: [PATCH 2/4] add comment for bss22 and data3

---
 .../CodeGen/X86/global-variable-partition.ll  | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index d457d766999e246..bb77f3362406bdc 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -50,8 +50,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; SYM: .section	.bss.unlikely.cold_bss,"aw", at nobits
 ; SYM: .section	.data.unlikely.cold_data,"aw", at progbits
 ; SYM: .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
-; SYM: .section	.bss.unlikely._ZL4bss2,"aw", at nobits
-; SYM: .section	.data.unlikely._ZL5data3,"aw", at progbits
+; SYM: .section	.bss.unlikely.bss2,"aw", at nobits
+; SYM: .section	.data.unlikely.data3,"aw", at progbits
 
 ; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,6
 ; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,7
@@ -67,15 +67,21 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
- at hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3]
+ at hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
 @hot_data = internal global i32 5
 @hot_bss = internal global i32 0
 @.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
 @cold_bss = internal global i32 0
 @cold_data = internal global i32 4
- at cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2]
- at _ZL4bss2 = internal global i32 0
- at _ZL5data3 = internal global i32 3
+ at cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
+
+; COM: Currently static-data-splitter only analyzes access from code.
+; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
+; COM: and @cold_relro_array.
+; COM: A follow-up item is to analyze access from data and prune the unlikely
+; COM: list.
+ at bss2 = internal global i32 0
+ at data3 = internal global i32 3
 
 define void @hot_callee(i32 %0) !prof !51 {
   %2 = call i32 (ptr, ...) @printf(ptr @.str)
@@ -112,8 +118,8 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
   %6 = call i32 @rand()
   store i32 %6, ptr @cold_bss
   store i32 %6, ptr @cold_data
-  store i32 %6, ptr @_ZL4bss2
-  store i32 %6, ptr @_ZL5data3
+  store i32 %6, ptr @bss2
+  store i32 %6, ptr @data3
   call void @cold_callee(i32 %6)
   ret i32 0
 

>From 8f21570c7d9dd528ad56ff2eefe85af8125ba99c Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Feb 2025 14:20:12 -0800
Subject: [PATCH 3/4] apply code review suggestions

---
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e6e20db0724a57a..0716f6dac27f1d2 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -123,11 +123,10 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        std::optional<uint64_t> Count = std::nullopt;
         if (!Op.isJTI() && !Op.isGlobal())
           continue;
 
-        Count = MBFI->getBlockProfileCount(&MBB);
+        std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
 
         if (Op.isJTI()) {
           assert(MJTI != nullptr && "Jump table info is not available.");

>From f07d34d0d02bf9a8c46364cf4e1e605d3b626b92 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Feb 2025 14:48:39 -0800
Subject: [PATCH 4/4] record global variable section prefix updates as module
 updates

---
 llvm/include/llvm/IR/GlobalObject.h     |  2 +-
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 13 +++++++++----
 llvm/lib/IR/Globals.cpp                 |  6 +++---
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index bb50c39813e1407..400ea6a1a7fca5b 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -129,7 +129,7 @@ class GlobalObject : public GlobalValue {
 
   /// Update the section prefix, unless the existing prefix is the same as
   /// `KeepPrefix`.
-  void updateSectionPrefix(StringRef Prefix,
+  bool updateSectionPrefix(StringRef Prefix,
                            std::optional<StringRef> KeepPrefix = std::nullopt);
 
   /// Get the section prefix for this global object.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 0716f6dac27f1d2..bbe3f9ff8cbd36a 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -59,7 +59,7 @@ class StaticDataSplitter : public MachineFunctionPass {
 
   // Iterate all global variables in the module and update the section prefix
   // of the module-internal data.
-  void updateGlobalVariableSectionPrefix(MachineFunction &MF);
+  bool updateGlobalVariableSectionPrefix(MachineFunction &MF);
 
   // Accummulated data profile count across machine functions in the module.
   DenseMap<const GlobalVariable *, APInt> DataProfileCounts;
@@ -105,6 +105,8 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
 
   bool Changed = partitionStaticDataWithProfiles(MF);
 
+  Changed |= updateGlobalVariableSectionPrefix(MF);
+
   updateStatsWithProfiles(MF);
   return Changed;
 }
@@ -188,8 +190,9 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
          Kind.isBSS();
 }
 
-void StaticDataSplitter::updateGlobalVariableSectionPrefix(
+bool StaticDataSplitter::updateGlobalVariableSectionPrefix(
     MachineFunction &MF) {
+  bool Changed = false;
   for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
     if (GV.isDeclarationForLinker())
       continue;
@@ -213,11 +216,13 @@ void StaticDataSplitter::updateGlobalVariableSectionPrefix(
     // framework, and set global variable section prefix once per module after
     // analyzing all machine functions.
     if (PSI->isColdCount(Iter->second.getZExtValue())) {
-      GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot")));
+      Changed |= GV.updateSectionPrefix("unlikely",
+                                        std::make_optional(StringRef("hot")));
     } else if (PSI->isHotCount(Iter->second.getZExtValue())) {
-      GV.updateSectionPrefix("hot");
+      Changed |= GV.updateSectionPrefix("hot");
     }
   }
+  return Changed;
 }
 
 void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 884089262e4659d..5baf854cd552ba3 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -293,15 +293,15 @@ void GlobalObject::setSectionPrefix(StringRef Prefix) {
               MDB.createGlobalObjectSectionPrefix(Prefix));
 }
 
-void GlobalObject::updateSectionPrefix(StringRef Prefix,
+bool GlobalObject::updateSectionPrefix(StringRef Prefix,
                                        std::optional<StringRef> KeepPrefix) {
   auto SectionPrefix = getSectionPrefix();
   if (SectionPrefix && (*SectionPrefix == Prefix ||
                         (KeepPrefix && *SectionPrefix == *KeepPrefix)))
-    return;
+    return false;
 
   setSectionPrefix(Prefix);
-  return;
+  return true;
 }
 
 std::optional<StringRef> GlobalObject::getSectionPrefix() const {



More information about the llvm-commits mailing list