[llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Sat Mar 29 21:28:03 PDT 2025


https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/129781

>From 8eea1ea0109f07d590d189bc17d5f5411556d77a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 4 Feb 2025 11:19:44 -0800
Subject: [PATCH 01/12] [CodeGen][StaticDataPartitioning]Place module-internal
 global variables based on profile information

---
 llvm/include/llvm/IR/Function.h               |   6 -
 llvm/include/llvm/IR/GlobalObject.h           |  11 +
 llvm/include/llvm/IR/MDBuilder.h              |   4 +-
 llvm/lib/CodeGen/StaticDataSplitter.cpp       | 201 +++++++++++++-----
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |   6 +
 llvm/lib/IR/Function.cpp                      |  16 --
 llvm/lib/IR/Globals.cpp                       |  30 +++
 llvm/lib/IR/MDBuilder.cpp                     |   6 +-
 llvm/test/CodeGen/X86/data-section-prefix.ll  |  27 +++
 .../CodeGen/X86/global-variable-partition.ll  | 159 ++++++++++++++
 .../CodeGenPrepare/X86/section-samplepgo.ll   |   4 +-
 .../Transforms/CodeGenPrepare/X86/section.ll  |   4 +-
 .../Transforms/HotColdSplit/coldentrycount.ll |   4 +-
 .../section-accurate-samplepgo.ll             |   6 +-
 14 files changed, 396 insertions(+), 88 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/data-section-prefix.ll
 create mode 100644 llvm/test/CodeGen/X86/global-variable-partition.ll

diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index fcd5396ccfdbc..29041688124bc 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -346,12 +346,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
   /// sample PGO, to enable the same inlines as the profiled optimized binary.
   DenseSet<GlobalValue::GUID> getImportGUIDs() const;
 
-  /// Set the section prefix for this function.
-  void setSectionPrefix(StringRef Prefix);
-
-  /// Get the section prefix for this function.
-  std::optional<StringRef> getSectionPrefix() const;
-
   /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
   ///                             to use during code generation.
   bool hasGC() const {
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 08edc13d81f88..bb50c39813e14 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -124,6 +124,17 @@ class GlobalObject : public GlobalValue {
   /// appropriate default object file section.
   void setSection(StringRef S);
 
+  /// Set the section prefix for this global object.
+  void setSectionPrefix(StringRef Prefix);
+
+  /// Update the section prefix, unless the existing prefix is the same as
+  /// `KeepPrefix`.
+  void updateSectionPrefix(StringRef Prefix,
+                           std::optional<StringRef> KeepPrefix = std::nullopt);
+
+  /// Get the section prefix for this global object.
+  std::optional<StringRef> getSectionPrefix() const;
+
   bool hasComdat() const { return getComdat() != nullptr; }
   const Comdat *getComdat() const { return ObjComdat; }
   Comdat *getComdat() { return ObjComdat; }
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index e02ec8f5a3d8b..ce4e1da656049 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -89,8 +89,8 @@ class MDBuilder {
   MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
                                    const DenseSet<GlobalValue::GUID> *Imports);
 
-  /// Return metadata containing the section prefix for a function.
-  MDNode *createFunctionSectionPrefix(StringRef Prefix);
+  /// Return metadata containing the section prefix for a global object.
+  MDNode *createGlobalObjectSectionPrefix(StringRef Prefix);
 
   /// Return metadata containing the pseudo probe descriptor for a function.
   MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName);
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e5bf0a5a3a255..f09e3b41e0723 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -9,13 +9,13 @@
 // The pass uses branch profile data to assign hotness based section qualifiers
 // for the following types of static data:
 // - Jump tables
+// - Module-internal global variables
 // - Constant pools (TODO)
-// - Other module-internal data (TODO)
 //
 // For the original RFC of this pass please see
 // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
 
-#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
@@ -27,9 +27,12 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 
@@ -46,12 +49,27 @@ class StaticDataSplitter : public MachineFunctionPass {
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   const ProfileSummaryInfo *PSI = nullptr;
 
-  // Returns true iff any jump table is hot-cold categorized.
-  bool splitJumpTables(MachineFunction &MF);
+  void updateStats(bool ProfileAvailable, const MachineJumpTableInfo *MJTI);
+  void updateJumpTableStats(bool ProfileAvailable,
+                            const MachineJumpTableInfo &MJTI);
 
-  // Same as above but works on functions with profile information.
-  bool splitJumpTablesWithProfiles(const MachineFunction &MF,
-                                   MachineJumpTableInfo &MJTI);
+  // Use profiles to partition static data.
+  bool partitionStaticDataWithProfiles(MachineFunction &MF);
+
+  // If the global value is a local linkage global variable, return it.
+  // Otherwise, return nullptr.
+  const GlobalVariable *getLocalLinkageGlobalVariable(const GlobalValue *GV);
+
+  // Returns true if the global variable is in one of {.rodata, .bss, .data,
+  // .data.rel.ro} sections
+  bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
+
+  // Iterate all global variables in the module and update the section prefix
+  // of the module-internal data.
+  void updateGlobalVariableSectionPrefix(MachineFunction &MF);
+
+  // Accummulated data profile count across machine functions in the module.
+  DenseMap<const GlobalVariable *, APInt> DataProfileCounts;
 
 public:
   static char ID;
@@ -77,13 +95,24 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
-  return splitJumpTables(MF);
+  const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
+                                MF.getFunction().hasProfileData();
+  bool Changed = false;
+
+  if (ProfileAvailable)
+    Changed |= partitionStaticDataWithProfiles(MF);
+
+  updateGlobalVariableSectionPrefix(MF);
+  updateStats(ProfileAvailable, MF.getJumpTableInfo());
+  return Changed;
 }
 
-bool StaticDataSplitter::splitJumpTablesWithProfiles(
-    const MachineFunction &MF, MachineJumpTableInfo &MJTI) {
+bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
+  const TargetMachine &TM = MF.getTarget();
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
   // `MachineOperand::isJTI()` to identify jump table operands.
@@ -92,63 +121,131 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isJTI())
-          continue;
-        const int JTI = Op.getIndex();
-        // This is not a source block of jump table.
-        if (JTI == -1)
+        std::optional<uint64_t> Count = std::nullopt;
+        if (!Op.isJTI() && !Op.isGlobal())
           continue;
 
-        auto Hotness = MachineFunctionDataHotness::Hot;
+        Count = MBFI->getBlockProfileCount(&MBB);
+
+        if (Op.isJTI()) {
+          assert(MJTI != nullptr && "Jump table info is not available.");
+          const int JTI = Op.getIndex();
+          // This is not a source block of jump table.
+          if (JTI == -1)
+            continue;
+
+          auto Hotness = MachineFunctionDataHotness::Hot;
+
+          // Hotness is based on source basic block hotness.
+          // TODO: PSI APIs are about instruction hotness. Introduce API for
+          // data access hotness.
+          if (Count && PSI->isColdCount(*Count))
+            Hotness = MachineFunctionDataHotness::Cold;
 
-        // Hotness is based on source basic block hotness.
-        // TODO: PSI APIs are about instruction hotness. Introduce API for data
-        // access hotness.
-        if (PSI->isColdBlock(&MBB, MBFI))
-          Hotness = MachineFunctionDataHotness::Cold;
+          if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
+            ++NumChangedJumpTables;
+        } else if (Op.isGlobal()) {
+          // Find global variables with local linkage
+          const GlobalVariable *GV =
+              getLocalLinkageGlobalVariable(Op.getGlobal());
+          if (!GV || !inStaticDataSection(GV, TM))
+            continue;
 
-        if (MJTI.updateJumpTableEntryHotness(JTI, Hotness))
-          ++NumChangedJumpTables;
+          // Acccumulate data profile count across machine function
+          // instructions.
+          // TODO: Analyze global variable's initializers.
+          if (Count) {
+            auto [It, Inserted] =
+                DataProfileCounts.try_emplace(GV, APInt(128, 0));
+            It->second += *Count;
+          }
+        }
       }
     }
   }
   return NumChangedJumpTables > 0;
 }
 
-bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
-  if (!MJTI || MJTI->getJumpTables().empty())
-    return false;
-
-  const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
-                                MF.getFunction().hasProfileData();
-  auto statOnExit = llvm::make_scope_exit([&] {
-    if (!AreStatisticsEnabled())
-      return;
+void StaticDataSplitter::updateJumpTableStats(
+    bool ProfileAvailable, const MachineJumpTableInfo &MJTI) {
+  if (!ProfileAvailable) {
+    NumUnknownJumpTables += MJTI.getJumpTables().size();
+    return;
+  }
 
-    if (!ProfileAvailable) {
-      NumUnknownJumpTables += MJTI->getJumpTables().size();
-      return;
+  for (size_t JTI = 0; JTI < MJTI.getJumpTables().size(); JTI++) {
+    auto Hotness = MJTI.getJumpTables()[JTI].Hotness;
+    if (Hotness == MachineFunctionDataHotness::Hot) {
+      ++NumHotJumpTables;
+    } else {
+      assert(Hotness == MachineFunctionDataHotness::Cold &&
+             "A jump table is either hot or cold when profile information is "
+             "available.");
+      ++NumColdJumpTables;
     }
+  }
+}
 
-    for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) {
-      auto Hotness = MJTI->getJumpTables()[JTI].Hotness;
-      if (Hotness == MachineFunctionDataHotness::Hot) {
-        ++NumHotJumpTables;
-      } else {
-        assert(Hotness == MachineFunctionDataHotness::Cold &&
-               "A jump table is either hot or cold when profile information is "
-               "available.");
-        ++NumColdJumpTables;
-      }
-    }
-  });
+void StaticDataSplitter::updateStats(bool ProfileAvailable,
+                                     const MachineJumpTableInfo *MJTI) {
+  if (!AreStatisticsEnabled())
+    return;
 
-  // Place jump tables according to block hotness if function has profile data.
-  if (ProfileAvailable)
-    return splitJumpTablesWithProfiles(MF, *MJTI);
+  if (MJTI)
+    updateJumpTableStats(ProfileAvailable, *MJTI);
+}
 
-  return true;
+const GlobalVariable *
+StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) {
+  if (!GV || GV->isDeclarationForLinker())
+    return nullptr;
+
+  return GV->hasLocalLinkage() ? dyn_cast<GlobalVariable>(GV) : nullptr;
+}
+
+bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
+                                             const TargetMachine &TM) {
+  assert(GV && "Caller guaranteed");
+
+  // Skip LLVM reserved symbols.
+  if (GV->getName().starts_with("llvm."))
+    return false;
+
+  SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+  return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() ||
+         Kind.isBSS();
+}
+
+void StaticDataSplitter::updateGlobalVariableSectionPrefix(
+    MachineFunction &MF) {
+  for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
+    if (GV.isDeclarationForLinker())
+      continue;
+    // DataProfileCounts accumulates data profile count across all machine
+    // function instructions, and it can't model the indirect accesses through
+    // other global variables' initializers.
+    // TODO: Analyze the users of module-internal global variables and see
+    // through the users' initializers. Do not place a global variable into
+    // unlikely section if any of its users are potentially hot.
+    auto Iter = DataProfileCounts.find(&GV);
+    if (Iter == DataProfileCounts.end())
+      continue;
+
+    // StaticDataSplitter is made a machine function pass rather than a module
+    // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function
+    // analysis pass and cannot be used for a legacy module pass.
+    // As a result, we use `DataProfileCounts` to accumulate data
+    // profile count across machine functions and update global variable section
+    // prefix once per machine function.
+    // FIXME: Make StaticDataSplitter a module pass under new pass manager
+    // framework, and set global variable section prefix once per module after
+    // analyzing all machine functions.
+    if (PSI->isColdCount(Iter->second.getZExtValue())) {
+      GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot")));
+    } else if (PSI->isHotCount(Iter->second.getZExtValue())) {
+      GV.updateSectionPrefix("hot");
+    }
+  }
 }
 
 char StaticDataSplitter::ID = 0;
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3c2c7c8c9fed6..d20ab29cc1979 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -670,6 +670,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
   }
 
   bool HasPrefix = false;
+
   if (const auto *F = dyn_cast<Function>(GO)) {
     // Jump table hotness takes precedence over its enclosing function's hotness
     // if it's known. The function's section prefix is used if jump table entry
@@ -687,6 +688,11 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
       raw_svector_ostream(Name) << '.' << *Prefix;
       HasPrefix = true;
     }
+  } else if (const auto *GV = dyn_cast<GlobalVariable>(GO)) {
+    if (std::optional<StringRef> Prefix = GV->getSectionPrefix()) {
+      raw_svector_ostream(Name) << '.' << *Prefix;
+      HasPrefix = true;
+    }
   }
 
   if (UniqueSectionName) {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index e6f0d64d071ba..5666f0a53866f 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1164,22 +1164,6 @@ DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
   return R;
 }
 
-void Function::setSectionPrefix(StringRef Prefix) {
-  MDBuilder MDB(getContext());
-  setMetadata(LLVMContext::MD_section_prefix,
-              MDB.createFunctionSectionPrefix(Prefix));
-}
-
-std::optional<StringRef> Function::getSectionPrefix() const {
-  if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
-    assert(cast<MDString>(MD->getOperand(0))->getString() ==
-               "function_section_prefix" &&
-           "Metadata not match");
-    return cast<MDString>(MD->getOperand(1))->getString();
-  }
-  return std::nullopt;
-}
-
 bool Function::nullPointerIsDefined() const {
   return hasFnAttribute(Attribute::NullPointerIsValid);
 }
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index db5e1cb57b1ba..884089262e465 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -286,6 +287,35 @@ void GlobalObject::setSection(StringRef S) {
   setGlobalObjectFlag(HasSectionHashEntryBit, !S.empty());
 }
 
+void GlobalObject::setSectionPrefix(StringRef Prefix) {
+  MDBuilder MDB(getContext());
+  setMetadata(LLVMContext::MD_section_prefix,
+              MDB.createGlobalObjectSectionPrefix(Prefix));
+}
+
+void GlobalObject::updateSectionPrefix(StringRef Prefix,
+                                       std::optional<StringRef> KeepPrefix) {
+  auto SectionPrefix = getSectionPrefix();
+  if (SectionPrefix && (*SectionPrefix == Prefix ||
+                        (KeepPrefix && *SectionPrefix == *KeepPrefix)))
+    return;
+
+  setSectionPrefix(Prefix);
+  return;
+}
+
+std::optional<StringRef> GlobalObject::getSectionPrefix() const {
+  if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
+    [[maybe_unused]] StringRef MDName =
+        cast<MDString>(MD->getOperand(0))->getString();
+    assert((MDName == "section_prefix" ||
+            (isa<Function>(this) && MDName == "function_section_prefix")) &&
+           "Metadata not match");
+    return cast<MDString>(MD->getOperand(1))->getString();
+  }
+  return std::nullopt;
+}
+
 bool GlobalValue::isNobuiltinFnDef() const {
   const Function *F = dyn_cast<Function>(this);
   if (!F || F->empty())
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 26c8ab9fc36c8..b6aa8844a7eaf 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -87,9 +87,9 @@ MDNode *MDBuilder::createFunctionEntryCount(
   return MDNode::get(Context, Ops);
 }
 
-MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {
-  return MDNode::get(
-      Context, {createString("function_section_prefix"), createString(Prefix)});
+MDNode *MDBuilder::createGlobalObjectSectionPrefix(StringRef Prefix) {
+  return MDNode::get(Context,
+                     {createString("section_prefix"), createString(Prefix)});
 }
 
 MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
diff --git a/llvm/test/CodeGen/X86/data-section-prefix.ll b/llvm/test/CodeGen/X86/data-section-prefix.ll
new file mode 100644
index 0000000000000..4812fc70758fb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/data-section-prefix.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-linux-gnu -data-sections %s -o - | FileCheck %s --check-prefix=ELF
+; RUN: llc -mtriple x86_64-linux-gnu -unique-section-names=0 -data-sections %s -o - | FileCheck %s --check-prefix=ELF-NOUNIQ
+
+; RUN: llc -mtriple x86_64-windows-msvc -data-sections %s -o - | FileCheck %s --check-prefix=COFF-MSVC
+
+; ELF: .section .data.hot.foo,
+; ELF: .section .data.bar,
+; ELF: .section .bss.unlikely.baz,
+; ELF: .section .bss.quz,
+
+; ELF-NOUNIQ: .section    .data.hot.,"aw", at progbits,unique,1
+; ELF-NOUNIQ: .section    .data,"aw", at progbits,unique,2
+; ELF-NOUNIQ: .section    .bss.unlikely.,"aw", at nobits,unique,3
+; ELF-NOUNIQ: .section    .bss,"aw", at nobits,unique,4
+
+; COFF-MSVC: .section .data,"dw",one_only,foo
+; COFF-MSVC: .section .data,"dw",one_only,bar
+; COFF-MSVC: .section .bss,"bw",one_only,baz
+; COFF-MSVC: .section .bss,"bw",one_only,quz
+
+ at foo = global i32 1, !section_prefix !0
+ at bar = global i32 2
+ at baz = global i32 0, !section_prefix !1
+ at quz = global i32 0
+
+!0 = !{!"section_prefix", !"hot"}
+!1 = !{!"section_prefix", !"unlikely"}
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
new file mode 100644
index 0000000000000..d457d766999e2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -0,0 +1,159 @@
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=true \
+; RUN:     -unique-section-names=true -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=true \
+; RUN:     -unique-section-names=false -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -data-sections=false \
+; RUN:     -unique-section-names=false -relocation-model=pic \
+; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA
+
+; SYM: .section .rodata.str1.1.hot.
+; UNIQ: .section	.rodata.str1.1.hot.,"aMS", at progbits,1
+; AGG: .section	.rodata.str1.1.hot
+; DATA: .L.str
+; DATA:    "hot\t"
+; DATA: .L.str.1
+; DATA:    "%d\t%d\t%d\n"
+
+
+; SYM:  .section	.data.rel.ro.hot.hot_relro_array
+; SYM: .section	.data.hot.hot_data,"aw", at progbits
+; SYM: .section	.bss.hot.hot_bss,"aw", at nobits
+
+; UNIQ: .section	.data.rel.ro.hot.,"aw", at progbits,unique,3
+; UNIQ: .section	.data.hot.,"aw", at progbits,unique,4
+; UNIQ: .section	.bss.hot.,"aw", at nobits,unique,5
+
+; AGG: .section	.data.rel.ro.hot.,"aw", at progbits
+; AGG: .section	.data.hot.,"aw", at progbits
+; AGG: .section .bss.hot.,"aw", at nobits
+
+
+; SYM: .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; UNIQ: section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; AGG: .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
+; DATA: .L.str.2:
+; DATA:    "cold%d\t%d\t%d\n"
+
+
+; SYM: .section	.bss.unlikely.cold_bss,"aw", at nobits
+; SYM: .section	.data.unlikely.cold_data,"aw", at progbits
+; SYM: .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
+; SYM: .section	.bss.unlikely._ZL4bss2,"aw", at nobits
+; SYM: .section	.data.unlikely._ZL5data3,"aw", at progbits
+
+; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,6
+; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,7
+; UNIQ: .section	.data.rel.ro.unlikely.,"aw", at progbits,unique,8
+; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,9
+; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,10
+
+; AGG: .section	.bss.unlikely.,"aw", at nobits
+; AGG: .section	.data.unlikely.,"aw", at progbits
+; AGG: .section	.data.rel.ro.unlikely.,"aw", at progbits
+; AGG: .section	.bss.unlikely.,"aw", at nobits
+; AGG: .section	.data.unlikely.,"aw", at progbits
+
+ at .str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
+ at .str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
+ at hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3]
+ at hot_data = internal global i32 5
+ at hot_bss = internal global i32 0
+ at .str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
+ at cold_bss = internal global i32 0
+ at cold_data = internal global i32 4
+ at cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2]
+ at _ZL4bss2 = internal global i32 0
+ at _ZL5data3 = internal global i32 3
+
+define void @hot_callee(i32 %0) !prof !51 {
+  %2 = call i32 (ptr, ...) @printf(ptr @.str)
+  %3 = srem i32 %0, 2
+  %4 = sext i32 %3 to i64
+  %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
+  %6 = load ptr, ptr %5
+  %7 = load i32, ptr %6
+  %8 = load i32, ptr @hot_data
+  %9 = load i32, ptr @hot_bss
+  %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9)
+  ret void
+}
+
+define void @cold_callee(i32 %0) !prof !52 {
+  %2 = load i32, ptr @cold_bss
+  %3 = load i32, ptr @cold_data
+  %4 = srem i32 %0, 2
+  %5 = sext i32 %4 to i64
+  %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5
+  %7 = load ptr, ptr %6
+  %8 = load i32, ptr %7
+  %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8)
+  ret void
+}
+
+define i32 @main(i32 %0, ptr %1) !prof !52 {
+  %3 = call i64 @time(ptr null)
+  %4 = trunc i64 %3 to i32
+  call void @srand(i32 %4)
+  br label %11
+
+5:                                                ; preds = %11
+  %6 = call i32 @rand()
+  store i32 %6, ptr @cold_bss
+  store i32 %6, ptr @cold_data
+  store i32 %6, ptr @_ZL4bss2
+  store i32 %6, ptr @_ZL5data3
+  call void @cold_callee(i32 %6)
+  ret i32 0
+
+11:                                               ; preds = %11, %2
+  %12 = phi i32 [ 0, %2 ], [ %19, %11 ]
+  %13 = call i32 @rand()
+  %14 = srem i32 %13, 2
+  %15 = sext i32 %14 to i64
+  %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15
+  %17 = load ptr, ptr %16
+  store i32 %13, ptr %17
+  store i32 %13, ptr @hot_data
+  %18 = add i32 %13, 1
+  store i32 %18, ptr @hot_bss
+  call void @hot_callee(i32 %12)
+  %19 = add i32 %12, 1
+  %20 = icmp eq i32 %19, 100000
+  br i1 %20, label %5, label %11, !prof !53
+}
+
+declare void @srand(i32)
+declare i64 @time(ptr)
+declare i32 @rand()
+declare i32 @printf(ptr, ...)
+
+!llvm.module.flags = !{!12}
+
+!12 = !{i32 1, !"ProfileSummary", !13}
+!13 = !{!14, !15, !16, !17, !18, !19, !20, !23}
+!14 = !{!"ProfileFormat", !"InstrProf"}
+!15 = !{!"TotalCount", i64 1460183}
+!16 = !{!"MaxCount", i64 849024}
+!17 = !{!"MaxInternalCount", i64 32769}
+!18 = !{!"MaxFunctionCount", i64 849024}
+!19 = !{!"NumCounts", i64 23627}
+!20 = !{!"NumFunctions", i64 3271}
+!23 = !{!"DetailedSummary", !24}
+!24 = !{!36, !40}
+!36 = !{i32 990000, i64 166, i32 73}
+!40 = !{i32 999999, i64 1, i32 1443}
+!51 = !{!"function_entry_count", i64 100000}
+!52 = !{!"function_entry_count", i64 1}
+!53 = !{!"branch_weights", i32 1, i32 99999}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
index 58af88d8cf365..48d02e5cebc69 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
@@ -34,8 +34,8 @@ define void @cold_func() !prof !16 {
   ret void
 }
 
-; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 !llvm.module.flags = !{!1}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
index 6dad1122e4294..4baa0b5baa4be 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
@@ -66,8 +66,8 @@ define void @cold_func3() !prof !16 {
   ret void
 }
 
-; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 !llvm.module.flags = !{!1}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
index 6e5ef1aa25392..1e8825e651ec4 100644
--- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
+++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
@@ -27,9 +27,9 @@ declare void @sink() cold
 ; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]]
 
 ; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100}
-; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !"hot"}
+; CHECK: ![[LIKELY]] = !{!"section_prefix", !"hot"}
 ; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0}
-; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[UNLIKELY]] = !{!"section_prefix", !"unlikely"}
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"ProfileSummary", !1}
diff --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
index ef2ddbc33cee4..af4b875818f6f 100644
--- a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
+++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
@@ -36,11 +36,11 @@ attributes #1 = { "use-sample-profile" }
 
 ; CHECK: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
 ; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 ; UNKNOWN: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
-; UNKNOWN: ![[UNKNOWN_ID]] = !{!"function_section_prefix", !"unknown"}
+; UNKNOWN: ![[UNKNOWN_ID]] = !{!"section_prefix", !"unknown"}
 ; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
-; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; ACCURATE: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
 !llvm.module.flags = !{!1}
 !1 = !{i32 1, !"ProfileSummary", !2}
 !2 = !{!3, !4, !5, !6, !7, !8, !9, !10}

>From 93d9881754b4713a6202011a2e1ffe520cf80367 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 4 Feb 2025 12:13:52 -0800
Subject: [PATCH 02/12] add comment for bss22 and data3

---
 .../CodeGen/X86/global-variable-partition.ll  | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index d457d766999e2..bb77f3362406b 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -50,8 +50,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; SYM: .section	.bss.unlikely.cold_bss,"aw", at nobits
 ; SYM: .section	.data.unlikely.cold_data,"aw", at progbits
 ; SYM: .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
-; SYM: .section	.bss.unlikely._ZL4bss2,"aw", at nobits
-; SYM: .section	.data.unlikely._ZL5data3,"aw", at progbits
+; SYM: .section	.bss.unlikely.bss2,"aw", at nobits
+; SYM: .section	.data.unlikely.data3,"aw", at progbits
 
 ; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,6
 ; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,7
@@ -67,15 +67,21 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
- at hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3]
+ at hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
 @hot_data = internal global i32 5
 @hot_bss = internal global i32 0
 @.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
 @cold_bss = internal global i32 0
 @cold_data = internal global i32 4
- at cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2]
- at _ZL4bss2 = internal global i32 0
- at _ZL5data3 = internal global i32 3
+ at cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
+
+; COM: Currently static-data-splitter only analyzes access from code.
+; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
+; COM: and @cold_relro_array.
+; COM: A follow-up item is to analyze access from data and prune the unlikely
+; COM: list.
+ at bss2 = internal global i32 0
+ at data3 = internal global i32 3
 
 define void @hot_callee(i32 %0) !prof !51 {
   %2 = call i32 (ptr, ...) @printf(ptr @.str)
@@ -112,8 +118,8 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
   %6 = call i32 @rand()
   store i32 %6, ptr @cold_bss
   store i32 %6, ptr @cold_data
-  store i32 %6, ptr @_ZL4bss2
-  store i32 %6, ptr @_ZL5data3
+  store i32 %6, ptr @bss2
+  store i32 %6, ptr @data3
   call void @cold_callee(i32 %6)
   ret i32 0
 

>From 8f21570c7d9dd528ad56ff2eefe85af8125ba99c Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Feb 2025 14:20:12 -0800
Subject: [PATCH 03/12] apply code review suggestions

---
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e6e20db0724a5..0716f6dac27f1 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -123,11 +123,10 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        std::optional<uint64_t> Count = std::nullopt;
         if (!Op.isJTI() && !Op.isGlobal())
           continue;
 
-        Count = MBFI->getBlockProfileCount(&MBB);
+        std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
 
         if (Op.isJTI()) {
           assert(MJTI != nullptr && "Jump table info is not available.");

>From f07d34d0d02bf9a8c46364cf4e1e605d3b626b92 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Feb 2025 14:48:39 -0800
Subject: [PATCH 04/12] record global variable section prefix updates as module
 updates

---
 llvm/include/llvm/IR/GlobalObject.h     |  2 +-
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 13 +++++++++----
 llvm/lib/IR/Globals.cpp                 |  6 +++---
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index bb50c39813e14..400ea6a1a7fca 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -129,7 +129,7 @@ class GlobalObject : public GlobalValue {
 
   /// Update the section prefix, unless the existing prefix is the same as
   /// `KeepPrefix`.
-  void updateSectionPrefix(StringRef Prefix,
+  bool updateSectionPrefix(StringRef Prefix,
                            std::optional<StringRef> KeepPrefix = std::nullopt);
 
   /// Get the section prefix for this global object.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 0716f6dac27f1..bbe3f9ff8cbd3 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -59,7 +59,7 @@ class StaticDataSplitter : public MachineFunctionPass {
 
   // Iterate all global variables in the module and update the section prefix
   // of the module-internal data.
-  void updateGlobalVariableSectionPrefix(MachineFunction &MF);
+  bool updateGlobalVariableSectionPrefix(MachineFunction &MF);
 
   // Accummulated data profile count across machine functions in the module.
   DenseMap<const GlobalVariable *, APInt> DataProfileCounts;
@@ -105,6 +105,8 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
 
   bool Changed = partitionStaticDataWithProfiles(MF);
 
+  Changed |= updateGlobalVariableSectionPrefix(MF);
+
   updateStatsWithProfiles(MF);
   return Changed;
 }
@@ -188,8 +190,9 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
          Kind.isBSS();
 }
 
-void StaticDataSplitter::updateGlobalVariableSectionPrefix(
+bool StaticDataSplitter::updateGlobalVariableSectionPrefix(
     MachineFunction &MF) {
+  bool Changed = false;
   for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
     if (GV.isDeclarationForLinker())
       continue;
@@ -213,11 +216,13 @@ void StaticDataSplitter::updateGlobalVariableSectionPrefix(
     // framework, and set global variable section prefix once per module after
     // analyzing all machine functions.
     if (PSI->isColdCount(Iter->second.getZExtValue())) {
-      GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot")));
+      Changed |= GV.updateSectionPrefix("unlikely",
+                                        std::make_optional(StringRef("hot")));
     } else if (PSI->isHotCount(Iter->second.getZExtValue())) {
-      GV.updateSectionPrefix("hot");
+      Changed |= GV.updateSectionPrefix("hot");
     }
   }
+  return Changed;
 }
 
 void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 884089262e465..5baf854cd552b 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -293,15 +293,15 @@ void GlobalObject::setSectionPrefix(StringRef Prefix) {
               MDB.createGlobalObjectSectionPrefix(Prefix));
 }
 
-void GlobalObject::updateSectionPrefix(StringRef Prefix,
+bool GlobalObject::updateSectionPrefix(StringRef Prefix,
                                        std::optional<StringRef> KeepPrefix) {
   auto SectionPrefix = getSectionPrefix();
   if (SectionPrefix && (*SectionPrefix == Prefix ||
                         (KeepPrefix && *SectionPrefix == *KeepPrefix)))
-    return;
+    return false;
 
   setSectionPrefix(Prefix);
-  return;
+  return true;
 }
 
 std::optional<StringRef> GlobalObject::getSectionPrefix() const {

>From 4a2a881a66bc99ba28609c05c1a03e432d16694c Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 7 Feb 2025 17:46:52 -0800
Subject: [PATCH 05/12] remove blank line

---
 llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d20ab29cc1979..6cbc4b9776a1b 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -670,7 +670,6 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
   }
 
   bool HasPrefix = false;
-
   if (const auto *F = dyn_cast<Function>(GO)) {
     // Jump table hotness takes precedence over its enclosing function's hotness
     // if it's known. The function's section prefix is used if jump table entry

>From 1f50494cd83b2c222191353050a955dd36beb610 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 24 Feb 2025 15:51:49 -0800
Subject: [PATCH 06/12] Implement module-wide analysis of global variable
 hotness. * In StaticDataProfileInfo.h/cpp, add an immutable pass to keep
 track of   constants and their profile information across functions in a
 module. * Add a module pass, StaticDataAnnotator, to set global variable's  
 section prefix based on module-wide hotness.

---
 .../llvm/Analysis/StaticDataProfileInfo.h     |  68 +++++++
 llvm/include/llvm/CodeGen/Passes.h            |   9 +-
 llvm/include/llvm/InitializePasses.h          |   2 +
 .../llvm/Passes/MachinePassRegistry.def       |   1 +
 llvm/lib/Analysis/CMakeLists.txt              |   1 +
 llvm/lib/Analysis/StaticDataProfileInfo.cpp   |  50 +++++
 llvm/lib/CodeGen/CMakeLists.txt               |   1 +
 llvm/lib/CodeGen/CodeGen.cpp                  |   1 +
 llvm/lib/CodeGen/StaticDataAnnotator.cpp      | 119 ++++++++++++
 llvm/lib/CodeGen/StaticDataSplitter.cpp       | 103 ++++-------
 llvm/lib/CodeGen/TargetPassConfig.cpp         |   7 +-
 .../CodeGen/X86/global-variable-partition.ll  | 173 +++++++++++-------
 12 files changed, 392 insertions(+), 143 deletions(-)
 create mode 100644 llvm/include/llvm/Analysis/StaticDataProfileInfo.h
 create mode 100644 llvm/lib/Analysis/StaticDataProfileInfo.cpp
 create mode 100644 llvm/lib/CodeGen/StaticDataAnnotator.cpp

diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
new file mode 100644
index 0000000000000..4220f7d820db9
--- /dev/null
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -0,0 +1,68 @@
+#ifndef LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
+#define LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A class that holds the constants that represent static data and their
+/// profile information and provides methods to operate on them.
+class StaticDataProfileInfo {
+public:
+  /// Accummulate the profile count of a constant that will be lowered to static
+  /// data sections.
+  DenseMap<const Constant *, uint64_t> ConstantProfileCounts;
+
+  /// Keeps track of the constants that are seen at least once without profile
+  /// counts.
+  DenseSet<const Constant *> ConstantWithoutCounts;
+
+public:
+  StaticDataProfileInfo() = default;
+
+  /// If \p Count is not nullopt, add it to the profile count of the constant \p
+  /// C in a saturating way, and clamp the count to \p getInstrMaxCountValue if
+  /// the result exceeds it. Otherwise, mark the constant as having no profile
+  /// count.
+  void addConstantProfileCount(const Constant *C,
+                               std::optional<uint64_t> Count);
+
+  /// If \p C has a count, return it. Otherwise, return std::nullopt.
+  std::optional<uint64_t> getConstantProfileCount(const Constant *C) const;
+
+  /// Return true if the constant \p C is seen at least once without profiles.
+  bool hasUnknownCount(const Constant *C) const {
+    return ConstantWithoutCounts.count(C);
+  }
+};
+
+/// This wraps the StaticDataProfileInfo object as an immutable pass, for a
+/// backend pass to operate on.
+class StaticDataProfileInfoWrapperPass : public ImmutablePass {
+public:
+  static char ID;
+  StaticDataProfileInfoWrapperPass();
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
+
+  StaticDataProfileInfo &getStaticDataProfileInfo() { return *Info; }
+  const StaticDataProfileInfo &getStaticDataProfileInfo() const {
+    return *Info;
+  }
+
+  /// This pass provides StaticDataProfileInfo for reads/writes but does not
+  /// modify \p M or other analysis. All analysis are preserved.
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+private:
+  std::unique_ptr<StaticDataProfileInfo> Info;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index b5d2a7e6bf035..95e4de91b068c 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -71,10 +71,15 @@ namespace llvm {
   /// using profile information.
   MachineFunctionPass *createMachineFunctionSplitterPass();
 
-  /// createStaticDataSplitterPass - This pass partitions a static data section
-  /// into a hot and cold section using profile information.
+  /// createStaticDataSplitterPass - This is a machine-function pass that
+  /// categorizes static data hotness using profile information.
   MachineFunctionPass *createStaticDataSplitterPass();
 
+  /// createStaticDataAnnotatorPASS - This is a module pass that reads from
+  /// StaticDataProfileInfoWrapperPass and annotates the section prefix of
+  /// global variables.
+  ModulePass *createStaticDataAnnotatorPass();
+
   /// MachineFunctionPrinter pass - This pass prints out the machine function to
   /// the given stream as a debugging tool.
   MachineFunctionPass *
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index b8df4d1ecab1d..96c240ab4965f 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -205,6 +205,8 @@ void initializeMachineLoopInfoWrapperPassPass(PassRegistry &);
 void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
 void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry &);
 void initializeMachineOutlinerPass(PassRegistry &);
+void initializeStaticDataProfileInfoWrapperPassPass(PassRegistry &);
+void initializeStaticDataAnnotatorPass(PassRegistry &);
 void initializeMachinePipelinerPass(PassRegistry &);
 void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializeMachineRegionInfoPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index e6b4a4b0a56ae..c5080a324864d 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -207,6 +207,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
 #define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME)
 #endif
 DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass)
+DUMMY_MACHINE_MODULE_PASS("static-data-annotator", StaticDataAnnotator)
 DUMMY_MACHINE_MODULE_PASS("pseudo-probe-inserter", PseudoProbeInserterPass)
 DUMMY_MACHINE_MODULE_PASS("mir-debugify", DebugifyMachineModule)
 DUMMY_MACHINE_MODULE_PASS("mir-check-debugify", CheckDebugMachineModulePass)
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index a44f6c6a135ef..fb2d7a82f670b 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -126,6 +126,7 @@ add_llvm_component_library(LLVMAnalysis
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp
   ScalarEvolutionNormalization.cpp
+  StaticDataProfileInfo.cpp
   StackLifetime.cpp
   StackSafetyAnalysis.cpp
   StructuralHash.cpp
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
new file mode 100644
index 0000000000000..b124e101f8cdf
--- /dev/null
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -0,0 +1,50 @@
+#include "llvm/Analysis/StaticDataProfileInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include <sys/types.h>
+
+using namespace llvm;
+void StaticDataProfileInfo::addConstantProfileCount(
+    const Constant *C, std::optional<uint64_t> Count) {
+  if (!Count) {
+    ConstantWithoutCounts.insert(C);
+    return;
+  }
+  uint64_t &OriginalCount = ConstantProfileCounts[C];
+  OriginalCount += llvm::SaturatingAdd(*Count, OriginalCount);
+  // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few
+  // large values for special use.
+  if (OriginalCount > getInstrMaxCountValue())
+    OriginalCount = getInstrMaxCountValue();
+}
+
+std::optional<uint64_t>
+StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
+  auto I = ConstantProfileCounts.find(C);
+  if (I == ConstantProfileCounts.end())
+    return std::nullopt;
+  return I->second;
+}
+
+bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
+  Info.reset(new StaticDataProfileInfo());
+  return false;
+}
+
+bool StaticDataProfileInfoWrapperPass::doFinalization(Module &M) {
+  Info.reset();
+  return false;
+}
+
+INITIALIZE_PASS(StaticDataProfileInfoWrapperPass, "static-data-profile-info",
+                "Static Data Profile Info", false, true)
+
+StaticDataProfileInfoWrapperPass::StaticDataProfileInfoWrapperPass()
+    : ImmutablePass(ID) {
+  initializeStaticDataProfileInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+}
+
+char StaticDataProfileInfoWrapperPass::ID = 0;
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 88f863d8204d0..0680e01223e2b 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -227,6 +227,7 @@ add_llvm_component_library(LLVMCodeGen
   StackProtector.cpp
   StackSlotColoring.cpp
   StaticDataSplitter.cpp
+  StaticDataAnnotator.cpp
   SwiftErrorValueTracking.cpp
   SwitchLoweringUtils.cpp
   TailDuplication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 35df2a479a545..7d37e1ce5ce8b 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -132,6 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringLegacyPass(Registry);
   initializeStaticDataSplitterPass(Registry);
+  initializeStaticDataAnnotatorPass(Registry);
   initializeStripDebugMachineModulePass(Registry);
   initializeTailDuplicateLegacyPass(Registry);
   initializeTargetPassConfigPass(Registry);
diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
new file mode 100644
index 0000000000000..04d918585f8af
--- /dev/null
+++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
@@ -0,0 +1,119 @@
+//===- StaticDataAnnotator - Annotate static data's section prefix --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// To reason about module-wide data hotness in a module granularity, this file
+// implements a module pass StaticDataAnnotator to work coordinately with the
+// StaticDataSplitter pass.
+//
+// The StaticDataSplitter pass is a machine function pass. It analyzes data
+// hotness based on code and adds counters in the StaticDataProfileInfo.
+// The StaticDataAnnotator pass is a module pass. It iterates global variables
+// in the module, looks up counters from StaticDataProfileInfo and sets the
+// section prefix based on profiles.
+//
+// The three-pass structure is implemented for practical reasons, to work around
+// the limitation that a module pass based on legacy pass manager cannot make
+// use of MachineBlockFrequencyInfo analysis. In the future, we can consider
+// porting the StaticDataSplitter pass to a module-pass using the new pass
+// manager framework. That way, analysis are lazily computed as opposed to
+// eagerly scheduled, and a module pass can use MachineBlockFrequencyInfo.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "static-data-annotator"
+
+using namespace llvm;
+
+class StaticDataAnnotator : public ModulePass {
+public:
+  static char ID;
+
+  StaticDataProfileInfo *SDPI = nullptr;
+  const ProfileSummaryInfo *PSI = nullptr;
+
+  StaticDataAnnotator() : ModulePass(ID) {
+    initializeStaticDataAnnotatorPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<StaticDataProfileInfoWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.setPreservesAll();
+    ModulePass::getAnalysisUsage(AU);
+  }
+
+  StringRef getPassName() const override { return "Static Data Annotator"; }
+
+  bool runOnModule(Module &M) override;
+};
+
+// Returns true if the global variable already has a section prefix that is the
+// same as `Prefix`.
+static bool alreadyHasSectionPrefix(const GlobalVariable &GV,
+                                    StringRef Prefix) {
+  std::optional<StringRef> SectionPrefix = GV.getSectionPrefix();
+  return SectionPrefix && (*SectionPrefix == Prefix);
+}
+
+bool StaticDataAnnotator::runOnModule(Module &M) {
+  SDPI = &getAnalysis<StaticDataProfileInfoWrapperPass>()
+              .getStaticDataProfileInfo();
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+  if (!PSI->hasProfileSummary())
+    return false;
+
+  bool Changed = false;
+  for (auto &GV : M.globals()) {
+    if (GV.isDeclarationForLinker())
+      continue;
+
+    // Skip global variables without profile counts. The module may not be
+    // profiled or instrumented.
+    auto Count = SDPI->getConstantProfileCount(&GV);
+    if (!Count)
+      continue;
+
+    if (PSI->isHotCount(*Count) && !alreadyHasSectionPrefix(GV, "hot")) {
+      // The variable counter is hot, set 'hot' section prefix if the section
+      // prefix isn't hot already.
+      GV.setSectionPrefix("hot");
+      Changed = true;
+    } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(&GV) &&
+               !alreadyHasSectionPrefix(GV, "unlikely")) {
+      // The variable counter is cold, set 'unlikely' section prefix when
+      // 1) the section prefix isn't unlikely already, and
+      // 2) the variable is not seen without profile counts. The reason is that
+      // a variable without profile counts doesn't have all its uses profiled,
+      // for example when a function is not instrumented, or not sampled (new
+      // code paths).
+      GV.setSectionPrefix("unlikely");
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+char StaticDataAnnotator::ID = 0;
+
+INITIALIZE_PASS(StaticDataAnnotator, DEBUG_TYPE, "Static Data Annotator", false,
+                false)
+
+ModulePass *llvm::createStaticDataAnnotatorPass() {
+  return new StaticDataAnnotator();
+}
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index aaf898e9b59c6..c647c3075d79c 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -30,9 +31,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
-#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
@@ -49,6 +48,7 @@ class StaticDataSplitter : public MachineFunctionPass {
   const MachineBranchProbabilityInfo *MBPI = nullptr;
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   const ProfileSummaryInfo *PSI = nullptr;
+  StaticDataProfileInfo *SDPI = nullptr;
 
   // If the global value is a local linkage global variable, return it.
   // Otherwise, return nullptr.
@@ -58,19 +58,16 @@ class StaticDataSplitter : public MachineFunctionPass {
   // .data.rel.ro} sections.
   bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
 
-  // Iterate all global variables in the module and update the section prefix
-  // of the module-internal data.
-  bool updateGlobalVariableSectionPrefix(MachineFunction &MF);
+  // Use profiles to partition static data.
+  bool partitionStaticDataWithProfiles(MachineFunction &MF);
 
-  // Accummulated data profile count across machine functions in the module.
-  DenseMap<const GlobalVariable *, uint64_t> DataProfileCounts;
-  // Update LLVM statistics for a machine function without profiles.
-  void updateStatsWithoutProfiles(const MachineFunction &MF);
   // Update LLVM statistics for a machine function with profiles.
   void updateStatsWithProfiles(const MachineFunction &MF);
 
-  // Use profiles to partition static data.
-  bool partitionStaticDataWithProfiles(MachineFunction &MF);
+  // Update LLVM statistics for a machine function without profiles.
+  void updateStatsWithoutProfiles(const MachineFunction &MF);
+
+  void annotateStaticDataWithoutProfiles(const MachineFunction &MF);
 
 public:
   static char ID;
@@ -86,6 +83,7 @@ class StaticDataSplitter : public MachineFunctionPass {
     AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<StaticDataProfileInfoWrapperPass>();
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -96,18 +94,20 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
   PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
+  SDPI = &getAnalysis<StaticDataProfileInfoWrapperPass>()
+              .getStaticDataProfileInfo();
+
   const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
                                 MF.getFunction().hasProfileData();
 
   if (!ProfileAvailable) {
+    annotateStaticDataWithoutProfiles(MF);
     updateStatsWithoutProfiles(MF);
     return false;
   }
 
   bool Changed = partitionStaticDataWithProfiles(MF);
 
-  Changed |= updateGlobalVariableSectionPrefix(MF);
-
   updateStatsWithProfiles(MF);
   return Changed;
 }
@@ -158,18 +158,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
           if (!GV || GV->getName().starts_with("llvm.") ||
               !inStaticDataSection(GV, TM))
             continue;
-
-          // Acccumulate data profile count across machine function
-          // instructions.
-          // TODO: Analyze global variable's initializers.
-          if (Count) {
-            uint64_t &GVCount = DataProfileCounts[GV];
-            GVCount = llvm::SaturatingAdd(GVCount, *Count);
-            // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few
-            // large values for special use.
-            if (GVCount > getInstrMaxCountValue())
-              GVCount = getInstrMaxCountValue();
-          }
+          SDPI->addConstantProfileCount(GV, Count);
         }
       }
     }
@@ -194,51 +183,6 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
          Kind.isBSS();
 }
 
-bool StaticDataSplitter::updateGlobalVariableSectionPrefix(
-    MachineFunction &MF) {
-  bool Changed = false;
-  for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
-    if (GV.isDeclarationForLinker())
-      continue;
-    // DataProfileCounts accumulates data profile count across all machine
-    // function instructions, and it can't model the indirect accesses through
-    // other global variables' initializers.
-    // TODO: Analyze the users of module-internal global variables and see
-    // through the users' initializers. Do not place a global variable into
-    // unlikely section if any of its users are potentially hot.
-    auto Iter = DataProfileCounts.find(&GV);
-    if (Iter == DataProfileCounts.end())
-      continue;
-
-    const std::optional<StringRef> Prefix = GV.getSectionPrefix();
-
-    // StaticDataSplitter is made a machine function pass rather than a module
-    // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function
-    // analysis pass and cannot be used for a legacy module pass.
-    // As a result, we use `DataProfileCounts` to accumulate data
-    // profile count across machine functions and update global variable section
-    // prefix once per machine function.
-    // FIXME: Make StaticDataSplitter a module pass under new pass manager
-    // framework, and set global variable section prefix once per module after
-    // analyzing all machine functions.
-    if (PSI->isColdCount(Iter->second)) {
-      assert((!Prefix || *Prefix != "hot") &&
-             "Count monotonically increased so a hot variable won't become "
-             "cold again.");
-      if (!Prefix || *Prefix != "unlikely") {
-        GV.setSectionPrefix("unlikely");
-        Changed |= true;
-      }
-    } else if (PSI->isHotCount(Iter->second)) {
-      if (!Prefix || *Prefix != "hot") {
-        GV.setSectionPrefix("hot");
-        Changed |= true;
-      }
-    }
-  }
-  return Changed;
-}
-
 void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
   if (!AreStatisticsEnabled())
     return;
@@ -257,6 +201,24 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
   }
 }
 
+void StaticDataSplitter::annotateStaticDataWithoutProfiles(
+    const MachineFunction &MF) {
+  for (const auto &MBB : MF) {
+    for (const MachineInstr &I : MBB) {
+      for (const MachineOperand &Op : I.operands()) {
+        if (!Op.isGlobal())
+          continue;
+        const GlobalVariable *GV =
+            getLocalLinkageGlobalVariable(Op.getGlobal());
+        if (!GV || GV->getName().starts_with("llvm.") ||
+            !inStaticDataSection(GV, MF.getTarget()))
+          continue;
+        SDPI->addConstantProfileCount(GV, std::nullopt);
+      }
+    }
+  }
+}
+
 void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) {
   if (!AreStatisticsEnabled())
     return;
@@ -273,6 +235,7 @@ INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data",
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StaticDataProfileInfoWrapperPass)
 INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false,
                     false)
 
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 5d9da9df9092a..7f89043c0b20c 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1257,8 +1257,13 @@ void TargetPassConfig::addMachinePasses() {
       }
     }
     addPass(createMachineFunctionSplitterPass());
-    if (SplitStaticData || TM->Options.EnableStaticDataPartitioning)
+    if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) {
+      // The static data splitter pass is a machine function pass. and
+      // static data annotator pass is a module-wide pass. See the file comment
+      // in StaticDataAnnotator.cpp for the motivation.
       addPass(createStaticDataSplitterPass());
+      addPass(createStaticDataAnnotatorPass());
+    }
   }
   // We run the BasicBlockSections pass if either we need BB sections or BB
   // address map (or both).
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index bb77f3362406b..b216047a5ea66 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -1,23 +1,35 @@
-
+; The static-data-splitter processes data from @cold_func first,
+; @unprofiled_func secondly, and @hot_func after the two functions above.
+; Tests that data hotness is based on aggregated module-wide profile
+; information. This way linker-mergable data is emitted once per module.
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+; The three RUN commands set `-relocation-model=pic` so `hot_relro_array` and
+; `cold_relro_array` are placed in the .data.rel.ro-prefixed section.
+
+; This RUN command sets `-data-sections=true -unique-section-names=true` so data
+; sections are uniqufied by numbers.
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
 ; RUN:     -partition-static-data-sections=true -data-sections=true \
 ; RUN:     -unique-section-names=true -relocation-model=pic \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA
 
+; This RUN command sets `-data-sections=true -unique-section-names=false` so
+; data sections are uniqufied by variable names.
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
 ; RUN:     -partition-static-data-sections=true -data-sections=true \
 ; RUN:     -unique-section-names=false -relocation-model=pic \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA
 
+; This RUN command sets `-data-sections=false -unique-section-names=false`.
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
 ; RUN:     -partition-static-data-sections=true -data-sections=false \
 ; RUN:     -unique-section-names=false -relocation-model=pic \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA
 
+; For @.str and @.str.1
 ; SYM: .section .rodata.str1.1.hot.
 ; UNIQ: .section	.rodata.str1.1.hot.,"aMS", at progbits,1
 ; AGG: .section	.rodata.str1.1.hot
@@ -26,45 +38,66 @@ target triple = "x86_64-unknown-linux-gnu"
 ; DATA: .L.str.1
 ; DATA:    "%d\t%d\t%d\n"
 
+; For @hot_relro_array
+; SYM: .section	.data.rel.ro.hot.hot_relro_array
+; UNIQ: .section	.data.rel.ro.hot.,"aw", at progbits,unique,3
+; AGG: .section	.data.rel.ro.hot.,"aw", at progbits
 
-; SYM:  .section	.data.rel.ro.hot.hot_relro_array
+; For @hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}.
 ; SYM: .section	.data.hot.hot_data,"aw", at progbits
-; SYM: .section	.bss.hot.hot_bss,"aw", at nobits
-
-; UNIQ: .section	.data.rel.ro.hot.,"aw", at progbits,unique,3
 ; UNIQ: .section	.data.hot.,"aw", at progbits,unique,4
-; UNIQ: .section	.bss.hot.,"aw", at nobits,unique,5
-
-; AGG: .section	.data.rel.ro.hot.,"aw", at progbits
 ; AGG: .section	.data.hot.,"aw", at progbits
-; AGG: .section .bss.hot.,"aw", at nobits
 
+; For @hot_bss, which is accessed by {unprofiled_func, hot_func}.
+; SYM: .section	.bss.hot.hot_bss,"aw", at nobits
+; UNIQ: .section	.bss.hot.,"aw", at nobits,unique,5
+; AGG: .section .bss.hot.,"aw", at nobits
 
+; For @.str.2
 ; SYM: .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
 ; UNIQ: section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
 ; AGG: .section	.rodata.str1.1.unlikely.,"aMS", at progbits,1
 ; DATA: .L.str.2:
 ; DATA:    "cold%d\t%d\t%d\n"
 
-
+; For @cold_bss
 ; SYM: .section	.bss.unlikely.cold_bss,"aw", at nobits
-; SYM: .section	.data.unlikely.cold_data,"aw", at progbits
-; SYM: .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
-; SYM: .section	.bss.unlikely.bss2,"aw", at nobits
-; SYM: .section	.data.unlikely.data3,"aw", at progbits
-
 ; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,6
-; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,7
-; UNIQ: .section	.data.rel.ro.unlikely.,"aw", at progbits,unique,8
-; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,9
-; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,10
-
 ; AGG: .section	.bss.unlikely.,"aw", at nobits
+
+; For @cold_data
+; SYM: .section	.data.unlikely.cold_data,"aw", at progbits
+; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,7
 ; AGG: .section	.data.unlikely.,"aw", at progbits
+
+; For @cold_relro_array
+; SYM: .section	.data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
+; UNIQ: .section	.data.rel.ro.unlikely.,"aw", at progbits,unique,8
 ; AGG: .section	.data.rel.ro.unlikely.,"aw", at progbits
+
+; Currently static-data-splitter only analyzes access from code.
+; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
+; and @cold_relro_array. A follow-up item is to analyze indirect access via data
+; and prune the unlikely list.
+; For @bss2
+; SYM: .section	.bss.unlikely.bss2,"aw", at nobits
+; UNIQ: .section	.bss.unlikely.,"aw", at nobits,unique,9
 ; AGG: .section	.bss.unlikely.,"aw", at nobits
+
+; For @data3
+; SYM: .section	.data.unlikely.data3,"aw", at progbits
+; UNIQ: .section	.data.unlikely.,"aw", at progbits,unique,10
 ; AGG: .section	.data.unlikely.,"aw", at progbits
 
+; For @data_with_unknown_hotness
+; SYM: 	.type	.Ldata_with_unknown_hotness, at object          # @data_with_unknown_hotness
+; SYM: .section .data..Ldata_with_unknown_hotness,"aw", at progbits
+; UNIQ: .section  .data,"aw", at progbits,unique,11
+; The `.section` directive is omitted for .data with -unique-section-names=false.
+; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+; AGG: .data
+; DATA: .Ldata_with_unknown_hotness:
+
 @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
@@ -74,29 +107,11 @@ target triple = "x86_64-unknown-linux-gnu"
 @cold_bss = internal global i32 0
 @cold_data = internal global i32 4
 @cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
-
-; COM: Currently static-data-splitter only analyzes access from code.
-; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
-; COM: and @cold_relro_array.
-; COM: A follow-up item is to analyze access from data and prune the unlikely
-; COM: list.
 @bss2 = internal global i32 0
 @data3 = internal global i32 3
+ at data_with_unknown_hotness = private global i32 5
 
-define void @hot_callee(i32 %0) !prof !51 {
-  %2 = call i32 (ptr, ...) @printf(ptr @.str)
-  %3 = srem i32 %0, 2
-  %4 = sext i32 %3 to i64
-  %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
-  %6 = load ptr, ptr %5
-  %7 = load i32, ptr %6
-  %8 = load i32, ptr @hot_data
-  %9 = load i32, ptr @hot_bss
-  %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9)
-  ret void
-}
-
-define void @cold_callee(i32 %0) !prof !52 {
+define void @cold_func(i32 %0) !prof !15 {
   %2 = load i32, ptr @cold_bss
   %3 = load i32, ptr @cold_data
   %4 = srem i32 %0, 2
@@ -104,14 +119,34 @@ define void @cold_callee(i32 %0) !prof !52 {
   %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5
   %7 = load ptr, ptr %6
   %8 = load i32, ptr %7
-  %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8)
+  %9 = load i32, ptr @data_with_unknown_hotness
+  %11 = load i32, ptr @hot_data
+  %12 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11)
+  ret void
+}
+
+define i32 @unprofiled_func() {
+  %a = load i32, ptr @data_with_unknown_hotness
+  %b = load i32, ptr @hot_data
+  %c = load i32, ptr @hot_bss
+  %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c)
+  ret i32 %ret
+}
+
+define void @hot_func(i32 %0) !prof !14 {
+  %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str)
+  %3 = srem i32 %0, 2
+  %4 = sext i32 %3 to i64
+  %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
+  %6 = load ptr, ptr %5
+  %7 = load i32, ptr %6
+  %8 = load i32, ptr @hot_data
+  %9 = load i32, ptr @hot_bss
+  %10 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9)
   ret void
 }
 
-define i32 @main(i32 %0, ptr %1) !prof !52 {
-  %3 = call i64 @time(ptr null)
-  %4 = trunc i64 %3 to i32
-  call void @srand(i32 %4)
+define i32 @main(i32 %0, ptr %1) !prof !15 {
   br label %11
 
 5:                                                ; preds = %11
@@ -120,7 +155,7 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
   store i32 %6, ptr @cold_data
   store i32 %6, ptr @bss2
   store i32 %6, ptr @data3
-  call void @cold_callee(i32 %6)
+  call void @cold_func(i32 %6)
   ret i32 0
 
 11:                                               ; preds = %11, %2
@@ -134,32 +169,30 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
   store i32 %13, ptr @hot_data
   %18 = add i32 %13, 1
   store i32 %18, ptr @hot_bss
-  call void @hot_callee(i32 %12)
+  call void @hot_func(i32 %12)
   %19 = add i32 %12, 1
   %20 = icmp eq i32 %19, 100000
-  br i1 %20, label %5, label %11, !prof !53
+  br i1 %20, label %5, label %11, !prof !16
 }
 
-declare void @srand(i32)
-declare i64 @time(ptr)
 declare i32 @rand()
-declare i32 @printf(ptr, ...)
-
-!llvm.module.flags = !{!12}
-
-!12 = !{i32 1, !"ProfileSummary", !13}
-!13 = !{!14, !15, !16, !17, !18, !19, !20, !23}
-!14 = !{!"ProfileFormat", !"InstrProf"}
-!15 = !{!"TotalCount", i64 1460183}
-!16 = !{!"MaxCount", i64 849024}
-!17 = !{!"MaxInternalCount", i64 32769}
-!18 = !{!"MaxFunctionCount", i64 849024}
-!19 = !{!"NumCounts", i64 23627}
-!20 = !{!"NumFunctions", i64 3271}
-!23 = !{!"DetailedSummary", !24}
-!24 = !{!36, !40}
-!36 = !{i32 990000, i64 166, i32 73}
-!40 = !{i32 999999, i64 1, i32 1443}
-!51 = !{!"function_entry_count", i64 100000}
-!52 = !{!"function_entry_count", i64 1}
-!53 = !{!"branch_weights", i32 1, i32 99999}
+declare i32 @func_taking_arbitrary_param(...)
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460183}
+!5 = !{!"MaxCount", i64 849024}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849024}
+!8 = !{!"NumCounts", i64 23627}
+!9 = !{!"NumFunctions", i64 3271}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13}
+!12 = !{i32 990000, i64 166, i32 73}
+!13 = !{i32 999999, i64 3, i32 1443}
+!14 = !{!"function_entry_count", i64 100000}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 1, i32 99999}

>From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 28 Feb 2025 14:41:56 -0800
Subject: [PATCH 07/12] [CodeGen][StaticDataSplitter]Support constant pool
 partitioning

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |   8 +
 .../CodeGen/TargetLoweringObjectFileImpl.h    |   6 +
 .../llvm/Target/TargetLoweringObjectFile.h    |   7 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  22 ++-
 llvm/lib/CodeGen/StaticDataSplitter.cpp       |  56 +++++--
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  35 +++++
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  10 ++
 llvm/lib/Target/TargetLoweringObjectFile.cpp  |  10 ++
 llvm/lib/Target/X86/X86AsmPrinter.cpp         |  10 ++
 .../AArch64/constant-pool-partition.ll        | 141 ++++++++++++++++++
 .../CodeGen/X86/constant-pool-partition.ll    | 131 ++++++++++++++++
 11 files changed, 422 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll
 create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 3da63af5ba571..2018f411be796 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
+  /// Provides the profile information for constants.
+  const StaticDataProfileInfo *SDPI = nullptr;
+
+  /// The profile summary information.
+  const ProfileSummaryInfo *PSI = nullptr;
+
   /// Map a basic block section ID to the begin and end symbols of that section
   ///  which determine the section's range.
   struct MBBSectionRange {
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10f0594c267ae..563980fb24ab8 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
                                    const Constant *C,
                                    Align &Alignment) const override;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+                                   const Constant *C, Align &Alignment,
+                                   StringRef SectionSuffix) const override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index a5ed1b29dc1bc..1956748b8058b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
                                            SectionKind Kind, const Constant *C,
                                            Align &Alignment) const;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+                                           SectionKind Kind, const Constant *C,
+                                           Align &Alignment,
+                                           StringRef SectionSuffix) const;
+
   virtual MCSection *
   getSectionForMachineBasicBlock(const Function &F,
                                  const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3c4280333e76d..60018afe2f8a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    MCSection *S = getObjFileLowering().getSectionForConstant(
-        getDataLayout(), Kind, C, Alignment);
+    MCSection *S = nullptr;
+    if (TM.Options.EnableStaticDataPartitioning) {
+      SmallString<8> SectionNameSuffix;
+      if (C && SDPI && PSI) {
+        auto Count = SDPI->getConstantProfileCount(C);
+        if (Count) {
+          if (PSI->isHotCount(*Count)) {
+            SectionNameSuffix.append("hot");
+          } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
+            SectionNameSuffix.append("unlikely");
+          }
+        }
+      }
+
+      S = getObjFileLowering().getSectionForConstant(
+          getDataLayout(), Kind, C, Alignment, SectionNameSuffix);
+    } else {
+      S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C,
+                                                     Alignment);
+    }
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index c647c3075d79c..4768c0829ea49 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -10,7 +10,7 @@
 // for the following types of static data:
 // - Jump tables
 // - Module-internal global variables
-// - Constant pools (TODO)
+// - Constant pools
 //
 // For the original RFC of this pass please see
 // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
@@ -117,16 +117,17 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
   const TargetMachine &TM = MF.getTarget();
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  const MachineConstantPool *MCP = MF.getConstantPool();
 
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
   // `MachineOperand::isJTI()` to identify jump table operands.
-  // Similarly, `MachineOperand::isCPI()` can identify constant pool usages
-  // in the same loop.
+  // Similarly, `MachineOperand::isCPI()` is used to identify constant pool
+  // usages in the same loop.
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isJTI() && !Op.isGlobal())
+        if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI())
           continue;
 
         std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
@@ -148,7 +149,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
           if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
             ++NumChangedJumpTables;
-        } else {
+        } else if (Op.isGlobal()) {
           // Find global variables with local linkage.
           const GlobalVariable *GV =
               getLocalLinkageGlobalVariable(Op.getGlobal());
@@ -159,6 +160,20 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
               !inStaticDataSection(GV, TM))
             continue;
           SDPI->addConstantProfileCount(GV, Count);
+        } else {
+          assert(Op.isCPI() && "Op must be constant pool index in this branch");
+          int CPI = Op.getIndex();
+          if (CPI == -1)
+            continue;
+
+          assert(MCP != nullptr && "Constant pool info is not available.");
+          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+          if (CPE.isMachineConstantPoolEntry())
+            continue;
+
+          const Constant *C = CPE.Val.ConstVal;
+          SDPI->addConstantProfileCount(C, Count);
         }
       }
     }
@@ -203,17 +218,34 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
 
 void StaticDataSplitter::annotateStaticDataWithoutProfiles(
     const MachineFunction &MF) {
+  const MachineConstantPool *MCP = MF.getConstantPool();
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isGlobal())
-          continue;
-        const GlobalVariable *GV =
-            getLocalLinkageGlobalVariable(Op.getGlobal());
-        if (!GV || GV->getName().starts_with("llvm.") ||
-            !inStaticDataSection(GV, MF.getTarget()))
+        if (!Op.isGlobal() && !Op.isCPI())
           continue;
-        SDPI->addConstantProfileCount(GV, std::nullopt);
+        if (Op.isGlobal()) {
+          const GlobalVariable *GV =
+              getLocalLinkageGlobalVariable(Op.getGlobal());
+          if (!GV || GV->getName().starts_with("llvm.") ||
+              !inStaticDataSection(GV, MF.getTarget()))
+            continue;
+          SDPI->addConstantProfileCount(GV, std::nullopt);
+        } else {
+          assert(Op.isCPI() && "Op must be constant pool index in this branch");
+          int CPI = Op.getIndex();
+          if (CPI == -1)
+            continue;
+
+          assert(MCP != nullptr && "Constant pool info is not available.");
+          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+          if (CPE.isMachineConstantPoolEntry())
+            continue;
+
+          const Constant *C = CPE.Val.ConstVal;
+          SDPI->addConstantProfileCount(C, std::nullopt);
+        }
       }
     }
   }
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index be2f5fb0b4a79..6cf8a0e9d211f 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1072,6 +1072,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
   return DataRelROSection;
 }
 
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+    StringRef SectionPrefix) const {
+  // TODO: Share code between this function and
+  // MCObjectInfo::initELFMCObjectFileInfo.
+  if (SectionPrefix.empty())
+    return getSectionForConstant(DL, Kind, C, Alignment);
+
+  auto &Context = getContext();
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return Context.getELFSection(".rodata.cst4." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return Context.getELFSection(".rodata.cst8." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return Context.getELFSection(".rodata.cst16." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
+  if (Kind.isMergeableConst32() && MergeableConst32Section)
+    return Context.getELFSection(".rodata.cst32." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
+  if (Kind.isReadOnly())
+    return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC);
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return Context.getELFSection(".data.rel.ro" + SectionPrefix,
+                               ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_WRITE);
+}
+
 /// Returns a unique section for the given machine basic block.
 MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
     const Function &F, const MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index fc38bfe93c1e0..74a78457e42ec 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -226,6 +226,16 @@ class AArch64AsmPrinter : public AsmPrinter {
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
+    auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>();
+    if (PSIW) {
+      PSI = &PSIW->getPSI();
+    }
+
+    auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>();
+    if (SDPIW) {
+      SDPI = &SDPIW->getStaticDataProfileInfo();
+    }
+
     AArch64FI = MF.getInfo<AArch64FunctionInfo>();
     STI = &MF.getSubtarget<AArch64Subtarget>();
 
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 02c101055d9f3..07f5532bee17e 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
   return DataSection;
 }
 
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+    StringRef SectionPrefix) const {
+  // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it
+  // is empty.
+  if (SectionPrefix.empty())
+    return getSectionForConstant(DL, Kind, C, Alignment);
+  report_fatal_error("Unimplemented");
+}
+
 MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock(
     const Function &F, const MachineBasicBlock &MBB,
     const TargetMachine &TM) const {
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 79aa898e18bfa..f58974e79efb9 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -61,6 +62,15 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM,
 /// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>();
+  if (PSIW) {
+    PSI = &PSIW->getPSI();
+  }
+
+  auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>();
+  if (SDPIW) {
+    SDPI = &SDPIW->getStaticDataProfileInfo();
+  }
   Subtarget = &MF.getSubtarget<X86Subtarget>();
 
   SMShadowTracker.startFunction(MF);
diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
new file mode 100644
index 0000000000000..5d2df59d34317
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+;   constant (e.g., from an unprofiled function, or cold function) should have
+;   `.hot` suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+;   function, constant pools for this constant should not have `.unlikely` suffix.
+
+; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI0_0:
+; CHECK:	   .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK:     .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK: .LCPI0_1:
+; CHECK:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
+; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI0_2:
+; CHECK:     .byte   0                               // 0x0
+; CHECK:     .byte   4                               // 0x4
+; CHECK:     .byte   8                               // 0x8
+; CHECK:     .byte   12                              // 0xc
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+
+; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI1_0:
+; CHECK:     .byte   0                               // 0x0
+; CHECK:     .byte   4                               // 0x4
+; CHECK:     .byte   8                               // 0x8
+; CHECK:     .byte   12                              // 0xc
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI1_1:
+; CHECK:      .word   442                             // 0x1ba
+; CHECK:      .word   100                             // 0x64
+; CHECK:      .word   0                               // 0x0
+; CHECK:      .word   0                               // 0x0
+
+; CHECK:      .section        .rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI2_0:
+; CHECK:      .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI2_1:
+; CHECK:      .word   442                             // 0x1ba
+; CHECK:      .word   100                             // 0x64
+; CHECK:      .word   0                               // 0x0
+; CHECK:      .word   0                               // 0x0
+
+; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
+; CHECK:    .globl	val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+  %3 = extractelement <2 x i32> %t2, i32 1
+  %sum = add i32 %2, %3
+  %ret = add i32 %sum, %num
+  ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+  %t3 = zext <4 x i16> %t2 to <4 x i32>
+  %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+  ret <4 x i1> %b
+}
+
+ at val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+  br label %7
+
+5:                                                ; preds = %7
+  %x = call double @double_func()
+  %a = call <16 x i8> @vector_func_16i8()
+  %b = call <16 x i8> @vector_func_16i8()
+  call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b)
+  ret i32 0
+
+7:                                                ; preds = %7, %2
+  %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+  %9 = call i32 @rand()
+  call void @hot_func(i32 %9)
+  %10 = add i32 %8, 1
+  %11 = icmp eq i32 %10, 100000
+  br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @rand()
+declare double @double_func()
+declare <4 x i32> @vector_func()
+declare <16 x i8> @vector_func_16i8()
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 3, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"branch_weights", i32 1, i32 99999}
diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll
new file mode 100644
index 0000000000000..e39a5d2026dd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll
@@ -0,0 +1,131 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from @cold_func first, two functions
+; without profiles secondly, and then @hot_func. Specifically, tests that
+; 1. If a constant is accessed by hot functions, all constant pools for this
+;    constant (e.g., from an unprofiled function, or cold function) should have
+;    .hot suffix.
+; 2. Similarly if a constant is accessed by both cold function and un-profiled
+;    function, constant pools for this constant should not have .unlikely suffix.
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true -data-sections=true \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true -data-sections=true \
+; RUN:     -unique-section-names=true \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=false -data-sections=false \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI0_0:
+; CHECK:	   .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK: 	   .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK: .LCPI0_1:
+; CHECK:	   .quad	0x3eb0000000000000              # double 9.5367431640625E-7
+
+; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI0_2:
+; CHECK:     .quad  0x3fc0000000000000              # double 0.125
+
+; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI1_0:
+; CHECK:     .quad   0x3fc0000000000000              # double 0.125
+
+; CHECK:     .section        .rodata.cst4,"aM", at progbits,4
+; CHECK: .LCPI2_0:
+; CHECK:     .long   0x3e000000              # float 0.125
+
+; CHECK:	   .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI3_0:
+; CHECK:     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK:     .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI3_1:
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK: .LCPI3_2:
+; CHECK:     .long   2147484090                      # 0x800001ba
+; CHECK:     .long   2147483748                      # 0x80000064
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+
+; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
+; CHECK:    .globl	val
+
+define double @cold_func(double %x) !prof !16 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %y = fmul double %x, 0x3EB0000000000000
+  %z = fmul double %y, 0x3fc0000000000000
+  ret double %z
+}
+
+define double @unprofiled_func_double(double %x) {
+  %z = fmul double %x, 0x3fc0000000000000
+  ret double %z
+}
+
+define float @unprofiled_func_float(float %x) {
+  %z = fmul float %x, 0x3fc0000000000000
+  ret float %z
+}
+
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+  ret <4 x i1> %b
+}
+
+ at val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+  br label %7
+
+5:                                                ; preds = %7
+  %x = call double @double_func()
+  call void @cold_func(double %x)
+  ret i32 0
+
+7:                                                ; preds = %7, %2
+  %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+  %9 = call i32 @rand()
+  call void @hot_func(i32 %9)
+  %10 = add i32 %8, 1
+  %11 = icmp eq i32 %10, 100000
+  br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @rand()
+declare double @double_func()
+declare i32 @func_taking_arbitrary_param(...)
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 1, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"branch_weights", i32 1, i32 99999}

>From 9fae47c06f8d559bd90ddec9be6b0cd34131bbd6 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Mar 2025 16:15:15 -0800
Subject: [PATCH 08/12] resolve comments

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |  4 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 39 ++++----
 llvm/lib/CodeGen/StaticDataSplitter.cpp       | 92 +++++++++----------
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  | 16 ++--
 4 files changed, 73 insertions(+), 78 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 2018f411be796..bd0f5ada805ab 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -338,6 +338,10 @@ class AsmPrinter : public MachineFunctionPass {
     DwarfUsesRelocationsAcrossSections = Enable;
   }
 
+  // Returns a section suffix (hot or unlikely) for the constant if profiles
+  // are available. Returns empty string otherwise.
+  StringRef getConstantSectionSuffix(const Constant *C) const;
+
   //===------------------------------------------------------------------===//
   // XRay instrumentation implementation.
   //===------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 60018afe2f8a7..bec3e718bd11b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2769,6 +2769,23 @@ namespace {
 
 } // end anonymous namespace
 
+StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const {
+  SmallString<8> SectionNameSuffix;
+  if (TM.Options.EnableStaticDataPartitioning) {
+    if (C && SDPI && PSI) {
+      auto Count = SDPI->getConstantProfileCount(C);
+      if (Count) {
+        if (PSI->isHotCount(*Count)) {
+          SectionNameSuffix.append("hot");
+        } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
+          SectionNameSuffix.append("unlikely");
+        }
+      }
+    }
+  }
+  return SectionNameSuffix.str();
+}
+
 /// EmitConstantPool - Print to the current output stream assembly
 /// representations of the constants in the constant pool MCP. This is
 /// used to print out constants which have been "spilled to memory" by
@@ -2791,26 +2808,8 @@ void AsmPrinter::emitConstantPool() {
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    MCSection *S = nullptr;
-    if (TM.Options.EnableStaticDataPartitioning) {
-      SmallString<8> SectionNameSuffix;
-      if (C && SDPI && PSI) {
-        auto Count = SDPI->getConstantProfileCount(C);
-        if (Count) {
-          if (PSI->isHotCount(*Count)) {
-            SectionNameSuffix.append("hot");
-          } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
-            SectionNameSuffix.append("unlikely");
-          }
-        }
-      }
-
-      S = getObjFileLowering().getSectionForConstant(
-          getDataLayout(), Kind, C, Alignment, SectionNameSuffix);
-    } else {
-      S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C,
-                                                     Alignment);
-    }
+    MCSection *S = getObjFileLowering().getSectionForConstant(
+        getDataLayout(), Kind, C, Alignment, getConstantSectionSuffix(C));
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 4768c0829ea49..df5ae7c2e8369 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -69,6 +69,11 @@ class StaticDataSplitter : public MachineFunctionPass {
 
   void annotateStaticDataWithoutProfiles(const MachineFunction &MF);
 
+  // Returns the constant if the operand refers to a global variable or constant
+  // that gets lowered to static data sections. Otherwise, return nullptr.
+  const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM,
+                              const MachineConstantPool *MCP);
+
 public:
   static char ID;
 
@@ -112,12 +117,42 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+const Constant *
+StaticDataSplitter::getConstant(const MachineOperand &Op,
+                                const TargetMachine &TM,
+                                const MachineConstantPool *MCP) {
+  if (!Op.isGlobal() && !Op.isCPI())
+    return nullptr;
+
+  if (Op.isGlobal()) {
+    // Find global variables with local linkage.
+    const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+    // Skip 'special' global variables conservatively because they are
+    // often handled specially, and skip those not in static data
+    // sections.
+    if (!GV || GV->getName().starts_with("llvm.") ||
+        !inStaticDataSection(GV, TM))
+      return nullptr;
+    return GV;
+  }
+  assert(Op.isCPI() && "Op must be constant pool index in this branch");
+  int CPI = Op.getIndex();
+  if (CPI == -1)
+    return nullptr;
+
+  assert(MCP != nullptr && "Constant pool info is not available.");
+  const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+  if (CPE.isMachineConstantPoolEntry())
+    return nullptr;
+
+  return CPE.Val.ConstVal;
+}
+
 bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
-  const TargetMachine &TM = MF.getTarget();
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
-  const MachineConstantPool *MCP = MF.getConstantPool();
 
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
@@ -149,30 +184,8 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
           if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
             ++NumChangedJumpTables;
-        } else if (Op.isGlobal()) {
-          // Find global variables with local linkage.
-          const GlobalVariable *GV =
-              getLocalLinkageGlobalVariable(Op.getGlobal());
-          // Skip 'special' global variables conservatively because they are
-          // often handled specially, and skip those not in static data
-          // sections.
-          if (!GV || GV->getName().starts_with("llvm.") ||
-              !inStaticDataSection(GV, TM))
-            continue;
-          SDPI->addConstantProfileCount(GV, Count);
-        } else {
-          assert(Op.isCPI() && "Op must be constant pool index in this branch");
-          int CPI = Op.getIndex();
-          if (CPI == -1)
-            continue;
-
-          assert(MCP != nullptr && "Constant pool info is not available.");
-          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
-
-          if (CPE.isMachineConstantPoolEntry())
-            continue;
-
-          const Constant *C = CPE.Val.ConstVal;
+        } else if (const Constant *C =
+                       getConstant(Op, MF.getTarget(), MF.getConstantPool())) {
           SDPI->addConstantProfileCount(C, Count);
         }
       }
@@ -218,34 +231,13 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
 
 void StaticDataSplitter::annotateStaticDataWithoutProfiles(
     const MachineFunction &MF) {
-  const MachineConstantPool *MCP = MF.getConstantPool();
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isGlobal() && !Op.isCPI())
-          continue;
-        if (Op.isGlobal()) {
-          const GlobalVariable *GV =
-              getLocalLinkageGlobalVariable(Op.getGlobal());
-          if (!GV || GV->getName().starts_with("llvm.") ||
-              !inStaticDataSection(GV, MF.getTarget()))
-            continue;
-          SDPI->addConstantProfileCount(GV, std::nullopt);
-        } else {
-          assert(Op.isCPI() && "Op must be constant pool index in this branch");
-          int CPI = Op.getIndex();
-          if (CPI == -1)
-            continue;
-
-          assert(MCP != nullptr && "Constant pool info is not available.");
-          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
-
-          if (CPE.isMachineConstantPoolEntry())
-            continue;
-
-          const Constant *C = CPE.Val.ConstVal;
+        const Constant *C =
+            getConstant(Op, MF.getTarget(), MF.getConstantPool());
+        if (C)
           SDPI->addConstantProfileCount(C, std::nullopt);
-        }
       }
     }
   }
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6cf8a0e9d211f..ad9c7f099df56 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1074,35 +1074,35 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
 
 MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
     const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
-    StringRef SectionPrefix) const {
+    StringRef SectionSuffix) const {
   // TODO: Share code between this function and
   // MCObjectInfo::initELFMCObjectFileInfo.
-  if (SectionPrefix.empty())
+  if (SectionSuffix.empty())
     return getSectionForConstant(DL, Kind, C, Alignment);
 
   auto &Context = getContext();
   if (Kind.isMergeableConst4() && MergeableConst4Section)
-    return Context.getELFSection(".rodata.cst4." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst4." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
   if (Kind.isMergeableConst8() && MergeableConst8Section)
-    return Context.getELFSection(".rodata.cst8." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst8." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
   if (Kind.isMergeableConst16() && MergeableConst16Section)
-    return Context.getELFSection(".rodata.cst16." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst16." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
   if (Kind.isMergeableConst32() && MergeableConst32Section)
-    return Context.getELFSection(".rodata.cst32." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst32." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
   if (Kind.isReadOnly())
-    return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS,
+    return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC);
 
   assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return Context.getELFSection(".data.rel.ro" + SectionPrefix,
+  return Context.getELFSection(".data.rel.ro." + SectionSuffix,
                                ELF::SHT_PROGBITS,
                                ELF::SHF_ALLOC | ELF::SHF_WRITE);
 }

>From 9302b2b708cdce89add071fa584c12ddb4f9d71d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 20 Mar 2025 13:57:37 -0700
Subject: [PATCH 09/12] port code de-duplication based on feedback in the
 follow up patch (https://github.com/llvm/llvm-project/pull/129781)

---
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 53 +++++++++++++------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index c647c3075d79c..77778556ce44e 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -58,6 +58,11 @@ class StaticDataSplitter : public MachineFunctionPass {
   // .data.rel.ro} sections.
   bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
 
+  // Returns the constant if the operand refers to a global variable or constant
+  // that gets lowered to static data sections. Otherwise, return nullptr.
+  const Constant *getConstant(const MachineOperand &Op,
+                              const TargetMachine &TM);
+
   // Use profiles to partition static data.
   bool partitionStaticDataWithProfiles(MachineFunction &MF);
 
@@ -84,6 +89,8 @@ class StaticDataSplitter : public MachineFunctionPass {
     AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<StaticDataProfileInfoWrapperPass>();
+    // This pass does not modify the CFG.
+    AU.setPreservesCFG();
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -112,6 +119,20 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op,
+                                                const TargetMachine &TM) {
+  if (!Op.isGlobal())
+    return nullptr;
+
+  // Find global variables with local linkage.
+  const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+  // Skip 'llvm.'-prefixed global variables conservatively because they are
+  // often handled specially, and skip those not in static data sections.
+  if (!GV || GV->getName().starts_with("llvm.") || !inStaticDataSection(GV, TM))
+    return nullptr;
+  return GV;
+}
+
 bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
@@ -148,17 +169,8 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
           if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
             ++NumChangedJumpTables;
-        } else {
-          // Find global variables with local linkage.
-          const GlobalVariable *GV =
-              getLocalLinkageGlobalVariable(Op.getGlobal());
-          // Skip 'special' global variables conservatively because they are
-          // often handled specially, and skip those not in static data
-          // sections.
-          if (!GV || GV->getName().starts_with("llvm.") ||
-              !inStaticDataSection(GV, TM))
-            continue;
-          SDPI->addConstantProfileCount(GV, Count);
+        } else if (const Constant *C = getConstant(Op, TM)) {
+          SDPI->addConstantProfileCount(C, Count);
         }
       }
     }
@@ -203,20 +215,11 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
 
 void StaticDataSplitter::annotateStaticDataWithoutProfiles(
     const MachineFunction &MF) {
-  for (const auto &MBB : MF) {
-    for (const MachineInstr &I : MBB) {
-      for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isGlobal())
-          continue;
-        const GlobalVariable *GV =
-            getLocalLinkageGlobalVariable(Op.getGlobal());
-        if (!GV || GV->getName().starts_with("llvm.") ||
-            !inStaticDataSection(GV, MF.getTarget()))
-          continue;
-        SDPI->addConstantProfileCount(GV, std::nullopt);
-      }
-    }
-  }
+  for (const auto &MBB : MF)
+    for (const MachineInstr &I : MBB)
+      for (const MachineOperand &Op : I.operands())
+        if (const Constant *C = getConstant(Op, MF.getTarget()))
+          SDPI->addConstantProfileCount(C, std::nullopt);
 }
 
 void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) {

>From 4f91e5c74afbe35efface1031ad8ae75c7fabe1e Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 26 Mar 2025 11:25:19 -0700
Subject: [PATCH 10/12] resolve comments

---
 .../AArch64/constant-pool-partition.ll        | 133 +++++++++++-------
 .../CodeGen/X86/constant-pool-partition.ll    |  88 +++++++-----
 2 files changed, 131 insertions(+), 90 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
index 5d2df59d34317..74b3632f39a7e 100644
--- a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -14,56 +14,84 @@
 ; secondly, and then hot_func. Specifically, tests that
 ; - If a constant is accessed by hot functions, all constant pools for this
 ;   constant (e.g., from an unprofiled function, or cold function) should have
-;   `.hot` suffix.
+;   `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and
+;   @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot`
+;   suffix.
 ; - Similarly if a constant is accessed by both cold function and un-profiled
 ;   function, constant pools for this constant should not have `.unlikely` suffix.
 
-; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI0_0:
-; CHECK:	   .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
-; CHECK:     .section	.rodata.cst8.unlikely,"aM", at progbits,8
-; CHECK: .LCPI0_1:
-; CHECK:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
-; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI0_2:
-; CHECK:     .byte   0                               // 0x0
-; CHECK:     .byte   4                               // 0x4
-; CHECK:     .byte   8                               // 0x8
-; CHECK:     .byte   12                              // 0xc
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-
-; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI1_0:
-; CHECK:     .byte   0                               // 0x0
-; CHECK:     .byte   4                               // 0x4
-; CHECK:     .byte   8                               // 0x8
-; CHECK:     .byte   12                              // 0xc
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
-; CHECK: .LCPI1_1:
-; CHECK:      .word   442                             // 0x1ba
-; CHECK:      .word   100                             // 0x64
-; CHECK:      .word   0                               // 0x0
-; CHECK:      .word   0                               // 0x0
-
+;; Constant pools for function @cold_func.
+; CHECK:       .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI0_0:
+; CHECK-NEXT:	    .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK-NEXT: .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI0_1:
+; CHECK-NEXT:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
+; CHECK-NEXT:	.section	.rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI0_2:
+; CHECK-NEXT:     .byte   0                               // 0x0
+; CHECK-NEXT:     .byte   4                               // 0x4
+; CHECK-NEXT:     .byte   8                               // 0x8
+; CHECK-NEXT:     .byte   12                              // 0xc
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+
+;; Constant pools for function @unprofiled_func
+; CHECK:	    .section	.rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI1_0:
+; CHECK-NEXT:     .byte   0                               // 0x0
+; CHECK-NEXT:     .byte   4                               // 0x4
+; CHECK-NEXT:     .byte   8                               // 0x8
+; CHECK-NEXT:     .byte   12                              // 0xc
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT: .section .rodata.cst16,"aM", at progbits,16 
+; CHECK-NEXT:     .p2align 
+; CHECK-NEXT:   .LCPI1_1: 
+; CHECK-NEXT:     .word 2                                 // 0x2 
+; CHECK-NEXT:     .word 3                                 // 0x3 
+; CHECK-NEXT:     .word 5                                 // 0x5 
+; CHECK-NEXT:     .word 7                                 // 0x7 
+; CHECK-NEXT: .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI1_2:
+; CHECK-NEXT:     .word   442                             // 0x1ba
+; CHECK-NEXT:     .word   100                             // 0x64
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   0                               // 0x0
+
+;; Constant pools for function @hot_func
 ; CHECK:      .section        .rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI2_0:
-; CHECK:      .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
-; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
-; CHECK: .LCPI2_1:
-; CHECK:      .word   442                             // 0x1ba
-; CHECK:      .word   100                             // 0x64
-; CHECK:      .word   0                               // 0x0
-; CHECK:      .word   0                               // 0x0
-
-; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
-; CHECK:    .globl	val
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI2_0:
+; CHECK-NEXT:     .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK-NEXT: .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI2_1:
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   100                             // 0x64
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   442                             // 0x1ba
+; CHECK-NEXT:   .LCPI2_2:
+; CHECK-NEXT:     .word   442                             // 0x1ba
+; CHECK-NEXT:     .word   100                             // 0x64
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   0                               // 0x0
+
+;; For global variable @val
+;; The section name remains `.rodata.cst32` without hotness prefix because
+;; the variable has external linkage and not analyzed. Compiler need symbolized
+;; data access profiles to annotate such global variables' hotness.
+; CHECK:       .section	.rodata.cst32,"aM", at progbits,32
+; CHECK-NEXT:  .globl	val
 
 define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
@@ -83,14 +111,16 @@ define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
   %t2 = bitcast <8 x i8> %t1 to <4 x i16>
   %t3 = zext <4 x i16> %t2 to <4 x i32>
-  %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3
+  %t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7>
+  %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t4
   ret <4 x i1> %cmp
 }
 
 define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
-  %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
-  ret <4 x i1> %b
+  %b = add <4 x i32> <i32 0, i32 100, i32 0, i32 442>, %a
+  %c = icmp ule <4 x i32> %b, <i32 442, i32 100, i32 0, i32 0>
+  ret <4 x i1> %c
 }
 
 @val = unnamed_addr constant i256 1
@@ -107,14 +137,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 {
 
 7:                                                ; preds = %7, %2
   %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
-  %9 = call i32 @rand()
+  %seed_val = load i256, ptr @val
+  %9 = call i32 @seed(i256 %seed_val)
   call void @hot_func(i32 %9)
   %10 = add i32 %8, 1
   %11 = icmp eq i32 %10, 100000
   br i1 %11, label %5, label %7, !prof !18
 }
 
-declare i32 @rand()
+declare i32 @seed(i256)
 declare double @double_func()
 declare <4 x i32> @vector_func()
 declare <16 x i8> @vector_func_16i8()
diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll
index e39a5d2026dd7..a1f16896a6094 100644
--- a/llvm/test/CodeGen/X86/constant-pool-partition.ll
+++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll
@@ -25,42 +25,52 @@ target triple = "x86_64-grtev4-linux-gnu"
 ; RUN:     -unique-section-names=false \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
 
-; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI0_0:
-; CHECK:	   .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
-; CHECK: 	   .section	.rodata.cst8.unlikely,"aM", at progbits,8
-; CHECK: .LCPI0_1:
-; CHECK:	   .quad	0x3eb0000000000000              # double 9.5367431640625E-7
-
-; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI0_2:
-; CHECK:     .quad  0x3fc0000000000000              # double 0.125
-
-; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI1_0:
-; CHECK:     .quad   0x3fc0000000000000              # double 0.125
-
-; CHECK:     .section        .rodata.cst4,"aM", at progbits,4
-; CHECK: .LCPI2_0:
-; CHECK:     .long   0x3e000000              # float 0.125
-
-; CHECK:	   .section	.rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI3_0:
-; CHECK:     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
-; CHECK:     .section        .rodata.cst16.hot,"aM", at progbits,16
-; CHECK: .LCPI3_1:
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK: .LCPI3_2:
-; CHECK:     .long   2147484090                      # 0x800001ba
-; CHECK:     .long   2147483748                      # 0x80000064
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-
-; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
-; CHECK:    .globl	val
+;; For function @cold_func
+; CHECK:       .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT:      .p2align 
+; CHECK-NEXT:    .LCPI0_0:
+; CHECK-NEXT:	     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK-NEXT:  .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI0_1:
+; CHECK-NEXT:	     .quad	0x3eb0000000000000              # double 9.5367431640625E-7
+; CHECK-NEXT:  .section        .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI0_2:
+; CHECK-NEXT:      .quad  0x3fc0000000000000              # double 0.125
+
+;; For function @unprofiled_func_double
+; CHECK:       .section        .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:      .p2align       
+; CHECK-NEXT:    .LCPI1_0:
+; CHECK-NEXT:     .quad   0x3fc0000000000000              # double 0.125
+
+;; For function @unprofiled_func_float
+; CHECK:       .section        .rodata.cst4,"aM", at progbits,4
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI2_0:
+; CHECK-NEXT:     .long   0x3e000000              # float 0.125
+
+;; For function @hot_func
+; CHECK:	     .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI3_0:
+; CHECK-NEXT:     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK-NEXT:  .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI3_1:
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:    .LCPI3_2:
+; CHECK-NEXT:      .long   2147484090                      # 0x800001ba
+; CHECK-NEXT:      .long   2147483748                      # 0x80000064
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+
+; CHECK:       .section	.rodata.cst32,"aM", at progbits,32
+; CHECK-NEXT:  .globl	val
 
 define double @cold_func(double %x) !prof !16 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
@@ -79,7 +89,6 @@ define float @unprofiled_func_float(float %x) {
   ret float %z
 }
 
-
 define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
   %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
@@ -98,14 +107,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 {
 
 7:                                                ; preds = %7, %2
   %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
-  %9 = call i32 @rand()
+  %seed_val = load i256, ptr @val
+  %9 = call i32 @seed(i256 %seed_val)
   call void @hot_func(i32 %9)
   %10 = add i32 %8, 1
   %11 = icmp eq i32 %10, 100000
   br i1 %11, label %5, label %7, !prof !18
 }
 
-declare i32 @rand()
+declare i32 @seed(i256)
 declare double @double_func()
 declare i32 @func_taking_arbitrary_param(...)
 

>From 99cd5317d963ffa312bd13247e64854ee32c9454 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 26 Mar 2025 13:11:05 -0700
Subject: [PATCH 11/12] clang-format

---
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index f6d9c55952c52..9ed5d33fd2524 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -58,10 +58,9 @@ class StaticDataSplitter : public MachineFunctionPass {
   // .data.rel.ro} sections.
   bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
 
-    // Returns the constant if the operand refers to a global variable or constant
+  // Returns the constant if the operand refers to a global variable or constant
   // that gets lowered to static data sections. Otherwise, return nullptr.
-  const Constant *getConstant(const MachineOperand &Op,
-                              const TargetMachine &TM,
+  const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM,
                               const MachineConstantPool *MCP);
 
   // Use profiles to partition static data.
@@ -247,7 +246,8 @@ void StaticDataSplitter::annotateStaticDataWithoutProfiles(
   for (const auto &MBB : MF)
     for (const MachineInstr &I : MBB)
       for (const MachineOperand &Op : I.operands())
-        if (const Constant *C = getConstant(Op, MF.getTarget(), MF.getConstantPool()))
+        if (const Constant *C =
+                getConstant(Op, MF.getTarget(), MF.getConstantPool()))
           SDPI->addConstantProfileCount(C, std::nullopt);
 }
 

>From 5fa795ec3b691d505e604951c330cea57400bb99 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sat, 29 Mar 2025 21:27:37 -0700
Subject: [PATCH 12/12] clang format

---
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e3b185c7581a3..8e12c5e5439ba 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -60,8 +60,7 @@ class StaticDataSplitter : public MachineFunctionPass {
 
   // Returns the constant if the operand refers to a global variable or constant
   // that gets lowered to static data sections. Otherwise, return nullptr.
-  const Constant *getConstant(const MachineOperand &Op,
-                              const TargetMachine &TM,
+  const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM,
                               const MachineConstantPool *MCP);
 
   // Use profiles to partition static data.



More information about the llvm-commits mailing list