[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

Mingming Liu via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Mar 26 13:11:24 PDT 2025


https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/129781

>From 072c44f0f9272682480cc2837196a906bd694276 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 28 Feb 2025 14:41:56 -0800
Subject: [PATCH 1/4] [CodeGen][StaticDataSplitter]Support constant pool
 partitioning

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |   8 +
 .../CodeGen/TargetLoweringObjectFileImpl.h    |   6 +
 .../llvm/Target/TargetLoweringObjectFile.h    |   7 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  22 ++-
 llvm/lib/CodeGen/StaticDataSplitter.cpp       |  56 +++++--
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  35 +++++
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  10 ++
 llvm/lib/Target/TargetLoweringObjectFile.cpp  |  10 ++
 llvm/lib/Target/X86/X86AsmPrinter.cpp         |  10 ++
 .../AArch64/constant-pool-partition.ll        | 141 ++++++++++++++++++
 .../CodeGen/X86/constant-pool-partition.ll    | 131 ++++++++++++++++
 11 files changed, 422 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/constant-pool-partition.ll
 create mode 100644 llvm/test/CodeGen/X86/constant-pool-partition.ll

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 3da63af5ba571..2018f411be796 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
+  /// Provides the profile information for constants.
+  const StaticDataProfileInfo *SDPI = nullptr;
+
+  /// The profile summary information.
+  const ProfileSummaryInfo *PSI = nullptr;
+
   /// Map a basic block section ID to the begin and end symbols of that section
   ///  which determine the section's range.
   struct MBBSectionRange {
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10f0594c267ae..563980fb24ab8 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
                                    const Constant *C,
                                    Align &Alignment) const override;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+                                   const Constant *C, Align &Alignment,
+                                   StringRef SectionSuffix) const override;
+
   MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index a5ed1b29dc1bc..1956748b8058b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
                                            SectionKind Kind, const Constant *C,
                                            Align &Alignment) const;
 
+  /// Similar to the function above, but append \p SectionSuffix to the section
+  /// name.
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+                                           SectionKind Kind, const Constant *C,
+                                           Align &Alignment,
+                                           StringRef SectionSuffix) const;
+
   virtual MCSection *
   getSectionForMachineBasicBlock(const Function &F,
                                  const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3c4280333e76d..60018afe2f8a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    MCSection *S = getObjFileLowering().getSectionForConstant(
-        getDataLayout(), Kind, C, Alignment);
+    MCSection *S = nullptr;
+    if (TM.Options.EnableStaticDataPartitioning) {
+      SmallString<8> SectionNameSuffix;
+      if (C && SDPI && PSI) {
+        auto Count = SDPI->getConstantProfileCount(C);
+        if (Count) {
+          if (PSI->isHotCount(*Count)) {
+            SectionNameSuffix.append("hot");
+          } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
+            SectionNameSuffix.append("unlikely");
+          }
+        }
+      }
+
+      S = getObjFileLowering().getSectionForConstant(
+          getDataLayout(), Kind, C, Alignment, SectionNameSuffix);
+    } else {
+      S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C,
+                                                     Alignment);
+    }
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index c647c3075d79c..4768c0829ea49 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -10,7 +10,7 @@
 // for the following types of static data:
 // - Jump tables
 // - Module-internal global variables
-// - Constant pools (TODO)
+// - Constant pools
 //
 // For the original RFC of this pass please see
 // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
@@ -117,16 +117,17 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
   const TargetMachine &TM = MF.getTarget();
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  const MachineConstantPool *MCP = MF.getConstantPool();
 
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
   // `MachineOperand::isJTI()` to identify jump table operands.
-  // Similarly, `MachineOperand::isCPI()` can identify constant pool usages
-  // in the same loop.
+  // Similarly, `MachineOperand::isCPI()` is used to identify constant pool
+  // usages in the same loop.
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isJTI() && !Op.isGlobal())
+        if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI())
           continue;
 
         std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
@@ -148,7 +149,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
           if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
             ++NumChangedJumpTables;
-        } else {
+        } else if (Op.isGlobal()) {
           // Find global variables with local linkage.
           const GlobalVariable *GV =
               getLocalLinkageGlobalVariable(Op.getGlobal());
@@ -159,6 +160,20 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
               !inStaticDataSection(GV, TM))
             continue;
           SDPI->addConstantProfileCount(GV, Count);
+        } else {
+          assert(Op.isCPI() && "Op must be constant pool index in this branch");
+          int CPI = Op.getIndex();
+          if (CPI == -1)
+            continue;
+
+          assert(MCP != nullptr && "Constant pool info is not available.");
+          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+          if (CPE.isMachineConstantPoolEntry())
+            continue;
+
+          const Constant *C = CPE.Val.ConstVal;
+          SDPI->addConstantProfileCount(C, Count);
         }
       }
     }
@@ -203,17 +218,34 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
 
 void StaticDataSplitter::annotateStaticDataWithoutProfiles(
     const MachineFunction &MF) {
+  const MachineConstantPool *MCP = MF.getConstantPool();
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isGlobal())
-          continue;
-        const GlobalVariable *GV =
-            getLocalLinkageGlobalVariable(Op.getGlobal());
-        if (!GV || GV->getName().starts_with("llvm.") ||
-            !inStaticDataSection(GV, MF.getTarget()))
+        if (!Op.isGlobal() && !Op.isCPI())
           continue;
-        SDPI->addConstantProfileCount(GV, std::nullopt);
+        if (Op.isGlobal()) {
+          const GlobalVariable *GV =
+              getLocalLinkageGlobalVariable(Op.getGlobal());
+          if (!GV || GV->getName().starts_with("llvm.") ||
+              !inStaticDataSection(GV, MF.getTarget()))
+            continue;
+          SDPI->addConstantProfileCount(GV, std::nullopt);
+        } else {
+          assert(Op.isCPI() && "Op must be constant pool index in this branch");
+          int CPI = Op.getIndex();
+          if (CPI == -1)
+            continue;
+
+          assert(MCP != nullptr && "Constant pool info is not available.");
+          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+          if (CPE.isMachineConstantPoolEntry())
+            continue;
+
+          const Constant *C = CPE.Val.ConstVal;
+          SDPI->addConstantProfileCount(C, std::nullopt);
+        }
       }
     }
   }
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index be2f5fb0b4a79..6cf8a0e9d211f 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1072,6 +1072,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
   return DataRelROSection;
 }
 
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+    StringRef SectionPrefix) const {
+  // TODO: Share code between this function and
+  // MCObjectInfo::initELFMCObjectFileInfo.
+  if (SectionPrefix.empty())
+    return getSectionForConstant(DL, Kind, C, Alignment);
+
+  auto &Context = getContext();
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return Context.getELFSection(".rodata.cst4." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return Context.getELFSection(".rodata.cst8." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return Context.getELFSection(".rodata.cst16." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
+  if (Kind.isMergeableConst32() && MergeableConst32Section)
+    return Context.getELFSection(".rodata.cst32." + SectionPrefix,
+                                 ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
+  if (Kind.isReadOnly())
+    return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC);
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return Context.getELFSection(".data.rel.ro" + SectionPrefix,
+                               ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_WRITE);
+}
+
 /// Returns a unique section for the given machine basic block.
 MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
     const Function &F, const MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index fc38bfe93c1e0..74a78457e42ec 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -226,6 +226,16 @@ class AArch64AsmPrinter : public AsmPrinter {
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
+    auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>();
+    if (PSIW) {
+      PSI = &PSIW->getPSI();
+    }
+
+    auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>();
+    if (SDPIW) {
+      SDPI = &SDPIW->getStaticDataProfileInfo();
+    }
+
     AArch64FI = MF.getInfo<AArch64FunctionInfo>();
     STI = &MF.getSubtarget<AArch64Subtarget>();
 
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 02c101055d9f3..07f5532bee17e 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
   return DataSection;
 }
 
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+    StringRef SectionPrefix) const {
+  // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it
+  // is empty.
+  if (SectionPrefix.empty())
+    return getSectionForConstant(DL, Kind, C, Alignment);
+  report_fatal_error("Unimplemented");
+}
+
 MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock(
     const Function &F, const MachineBasicBlock &MBB,
     const TargetMachine &TM) const {
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 79aa898e18bfa..f58974e79efb9 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -61,6 +62,15 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM,
 /// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>();
+  if (PSIW) {
+    PSI = &PSIW->getPSI();
+  }
+
+  auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>();
+  if (SDPIW) {
+    SDPI = &SDPIW->getStaticDataProfileInfo();
+  }
   Subtarget = &MF.getSubtarget<X86Subtarget>();
 
   SMShadowTracker.startFunction(MF);
diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
new file mode 100644
index 0000000000000..5d2df59d34317
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+;   constant (e.g., from an unprofiled function, or cold function) should have
+;   `.hot` suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+;   function, constant pools for this constant should not have `.unlikely` suffix.
+
+; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI0_0:
+; CHECK:	   .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK:     .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK: .LCPI0_1:
+; CHECK:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
+; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI0_2:
+; CHECK:     .byte   0                               // 0x0
+; CHECK:     .byte   4                               // 0x4
+; CHECK:     .byte   8                               // 0x8
+; CHECK:     .byte   12                              // 0xc
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+
+; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI1_0:
+; CHECK:     .byte   0                               // 0x0
+; CHECK:     .byte   4                               // 0x4
+; CHECK:     .byte   8                               // 0x8
+; CHECK:     .byte   12                              // 0xc
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:     .byte   255                             // 0xff
+; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI1_1:
+; CHECK:      .word   442                             // 0x1ba
+; CHECK:      .word   100                             // 0x64
+; CHECK:      .word   0                               // 0x0
+; CHECK:      .word   0                               // 0x0
+
+; CHECK:      .section        .rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI2_0:
+; CHECK:      .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI2_1:
+; CHECK:      .word   442                             // 0x1ba
+; CHECK:      .word   100                             // 0x64
+; CHECK:      .word   0                               // 0x0
+; CHECK:      .word   0                               // 0x0
+
+; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
+; CHECK:    .globl	val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+  %3 = extractelement <2 x i32> %t2, i32 1
+  %sum = add i32 %2, %3
+  %ret = add i32 %sum, %num
+  ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+  %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+  %t3 = zext <4 x i16> %t2 to <4 x i32>
+  %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+  ret <4 x i1> %b
+}
+
+ at val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+  br label %7
+
+5:                                                ; preds = %7
+  %x = call double @double_func()
+  %a = call <16 x i8> @vector_func_16i8()
+  %b = call <16 x i8> @vector_func_16i8()
+  call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b)
+  ret i32 0
+
+7:                                                ; preds = %7, %2
+  %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+  %9 = call i32 @rand()
+  call void @hot_func(i32 %9)
+  %10 = add i32 %8, 1
+  %11 = icmp eq i32 %10, 100000
+  br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @rand()
+declare double @double_func()
+declare <4 x i32> @vector_func()
+declare <16 x i8> @vector_func_16i8()
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 3, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"branch_weights", i32 1, i32 99999}
diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll
new file mode 100644
index 0000000000000..e39a5d2026dd7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll
@@ -0,0 +1,131 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from @cold_func first, two functions
+; without profiles secondly, and then @hot_func. Specifically, tests that
+; 1. If a constant is accessed by hot functions, all constant pools for this
+;    constant (e.g., from an unprofiled function, or cold function) should have
+;    .hot suffix.
+; 2. Similarly if a constant is accessed by both cold function and un-profiled
+;    function, constant pools for this constant should not have .unlikely suffix.
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true -data-sections=true \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=true -data-sections=true \
+; RUN:     -unique-section-names=true \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN:     -partition-static-data-sections=true -function-sections=false -data-sections=false \
+; RUN:     -unique-section-names=false \
+; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI0_0:
+; CHECK:	   .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK: 	   .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK: .LCPI0_1:
+; CHECK:	   .quad	0x3eb0000000000000              # double 9.5367431640625E-7
+
+; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI0_2:
+; CHECK:     .quad  0x3fc0000000000000              # double 0.125
+
+; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI1_0:
+; CHECK:     .quad   0x3fc0000000000000              # double 0.125
+
+; CHECK:     .section        .rodata.cst4,"aM", at progbits,4
+; CHECK: .LCPI2_0:
+; CHECK:     .long   0x3e000000              # float 0.125
+
+; CHECK:	   .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI3_0:
+; CHECK:     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK:     .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI3_1:
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK: .LCPI3_2:
+; CHECK:     .long   2147484090                      # 0x800001ba
+; CHECK:     .long   2147483748                      # 0x80000064
+; CHECK:     .long   2147483648                      # 0x80000000
+; CHECK:     .long   2147483648                      # 0x80000000
+
+; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
+; CHECK:    .globl	val
+
+define double @cold_func(double %x) !prof !16 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %y = fmul double %x, 0x3EB0000000000000
+  %z = fmul double %y, 0x3fc0000000000000
+  ret double %z
+}
+
+define double @unprofiled_func_double(double %x) {
+  %z = fmul double %x, 0x3fc0000000000000
+  ret double %z
+}
+
+define float @unprofiled_func_float(float %x) {
+  %z = fmul float %x, 0x3fc0000000000000
+  ret float %z
+}
+
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+  %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+  %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+  ret <4 x i1> %b
+}
+
+ at val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+  br label %7
+
+5:                                                ; preds = %7
+  %x = call double @double_func()
+  call void @cold_func(double %x)
+  ret i32 0
+
+7:                                                ; preds = %7, %2
+  %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+  %9 = call i32 @rand()
+  call void @hot_func(i32 %9)
+  %10 = add i32 %8, 1
+  %11 = icmp eq i32 %10, 100000
+  br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @rand()
+declare double @double_func()
+declare i32 @func_taking_arbitrary_param(...)
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 1, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"branch_weights", i32 1, i32 99999}

>From 9fae47c06f8d559bd90ddec9be6b0cd34131bbd6 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Mar 2025 16:15:15 -0800
Subject: [PATCH 2/4] resolve comments

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |  4 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 39 ++++----
 llvm/lib/CodeGen/StaticDataSplitter.cpp       | 92 +++++++++----------
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  | 16 ++--
 4 files changed, 73 insertions(+), 78 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 2018f411be796..bd0f5ada805ab 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -338,6 +338,10 @@ class AsmPrinter : public MachineFunctionPass {
     DwarfUsesRelocationsAcrossSections = Enable;
   }
 
+  // Returns a section suffix (hot or unlikely) for the constant if profiles
+  // are available. Returns empty string otherwise.
+  StringRef getConstantSectionSuffix(const Constant *C) const;
+
   //===------------------------------------------------------------------===//
   // XRay instrumentation implementation.
   //===------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 60018afe2f8a7..bec3e718bd11b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2769,6 +2769,23 @@ namespace {
 
 } // end anonymous namespace
 
+StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const {
+  SmallString<8> SectionNameSuffix;
+  if (TM.Options.EnableStaticDataPartitioning) {
+    if (C && SDPI && PSI) {
+      auto Count = SDPI->getConstantProfileCount(C);
+      if (Count) {
+        if (PSI->isHotCount(*Count)) {
+          SectionNameSuffix.append("hot");
+        } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
+          SectionNameSuffix.append("unlikely");
+        }
+      }
+    }
+  }
+  return SectionNameSuffix.str();
+}
+
 /// EmitConstantPool - Print to the current output stream assembly
 /// representations of the constants in the constant pool MCP. This is
 /// used to print out constants which have been "spilled to memory" by
@@ -2791,26 +2808,8 @@ void AsmPrinter::emitConstantPool() {
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    MCSection *S = nullptr;
-    if (TM.Options.EnableStaticDataPartitioning) {
-      SmallString<8> SectionNameSuffix;
-      if (C && SDPI && PSI) {
-        auto Count = SDPI->getConstantProfileCount(C);
-        if (Count) {
-          if (PSI->isHotCount(*Count)) {
-            SectionNameSuffix.append("hot");
-          } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
-            SectionNameSuffix.append("unlikely");
-          }
-        }
-      }
-
-      S = getObjFileLowering().getSectionForConstant(
-          getDataLayout(), Kind, C, Alignment, SectionNameSuffix);
-    } else {
-      S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C,
-                                                     Alignment);
-    }
+    MCSection *S = getObjFileLowering().getSectionForConstant(
+        getDataLayout(), Kind, C, Alignment, getConstantSectionSuffix(C));
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 4768c0829ea49..df5ae7c2e8369 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -69,6 +69,11 @@ class StaticDataSplitter : public MachineFunctionPass {
 
   void annotateStaticDataWithoutProfiles(const MachineFunction &MF);
 
+  // Returns the constant if the operand refers to a global variable or constant
+  // that gets lowered to static data sections. Otherwise, return nullptr.
+  const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM,
+                              const MachineConstantPool *MCP);
+
 public:
   static char ID;
 
@@ -112,12 +117,42 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+const Constant *
+StaticDataSplitter::getConstant(const MachineOperand &Op,
+                                const TargetMachine &TM,
+                                const MachineConstantPool *MCP) {
+  if (!Op.isGlobal() && !Op.isCPI())
+    return nullptr;
+
+  if (Op.isGlobal()) {
+    // Find global variables with local linkage.
+    const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+    // Skip 'special' global variables conservatively because they are
+    // often handled specially, and skip those not in static data
+    // sections.
+    if (!GV || GV->getName().starts_with("llvm.") ||
+        !inStaticDataSection(GV, TM))
+      return nullptr;
+    return GV;
+  }
+  assert(Op.isCPI() && "Op must be constant pool index in this branch");
+  int CPI = Op.getIndex();
+  if (CPI == -1)
+    return nullptr;
+
+  assert(MCP != nullptr && "Constant pool info is not available.");
+  const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+  if (CPE.isMachineConstantPoolEntry())
+    return nullptr;
+
+  return CPE.Val.ConstVal;
+}
+
 bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
   int NumChangedJumpTables = 0;
 
-  const TargetMachine &TM = MF.getTarget();
   MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
-  const MachineConstantPool *MCP = MF.getConstantPool();
 
   // Jump table could be used by either terminating instructions or
   // non-terminating ones, so we walk all instructions and use
@@ -149,30 +184,8 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
 
           if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
             ++NumChangedJumpTables;
-        } else if (Op.isGlobal()) {
-          // Find global variables with local linkage.
-          const GlobalVariable *GV =
-              getLocalLinkageGlobalVariable(Op.getGlobal());
-          // Skip 'special' global variables conservatively because they are
-          // often handled specially, and skip those not in static data
-          // sections.
-          if (!GV || GV->getName().starts_with("llvm.") ||
-              !inStaticDataSection(GV, TM))
-            continue;
-          SDPI->addConstantProfileCount(GV, Count);
-        } else {
-          assert(Op.isCPI() && "Op must be constant pool index in this branch");
-          int CPI = Op.getIndex();
-          if (CPI == -1)
-            continue;
-
-          assert(MCP != nullptr && "Constant pool info is not available.");
-          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
-
-          if (CPE.isMachineConstantPoolEntry())
-            continue;
-
-          const Constant *C = CPE.Val.ConstVal;
+        } else if (const Constant *C =
+                       getConstant(Op, MF.getTarget(), MF.getConstantPool())) {
           SDPI->addConstantProfileCount(C, Count);
         }
       }
@@ -218,34 +231,13 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
 
 void StaticDataSplitter::annotateStaticDataWithoutProfiles(
     const MachineFunction &MF) {
-  const MachineConstantPool *MCP = MF.getConstantPool();
   for (const auto &MBB : MF) {
     for (const MachineInstr &I : MBB) {
       for (const MachineOperand &Op : I.operands()) {
-        if (!Op.isGlobal() && !Op.isCPI())
-          continue;
-        if (Op.isGlobal()) {
-          const GlobalVariable *GV =
-              getLocalLinkageGlobalVariable(Op.getGlobal());
-          if (!GV || GV->getName().starts_with("llvm.") ||
-              !inStaticDataSection(GV, MF.getTarget()))
-            continue;
-          SDPI->addConstantProfileCount(GV, std::nullopt);
-        } else {
-          assert(Op.isCPI() && "Op must be constant pool index in this branch");
-          int CPI = Op.getIndex();
-          if (CPI == -1)
-            continue;
-
-          assert(MCP != nullptr && "Constant pool info is not available.");
-          const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
-
-          if (CPE.isMachineConstantPoolEntry())
-            continue;
-
-          const Constant *C = CPE.Val.ConstVal;
+        const Constant *C =
+            getConstant(Op, MF.getTarget(), MF.getConstantPool());
+        if (C)
           SDPI->addConstantProfileCount(C, std::nullopt);
-        }
       }
     }
   }
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6cf8a0e9d211f..ad9c7f099df56 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1074,35 +1074,35 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
 
 MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
     const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
-    StringRef SectionPrefix) const {
+    StringRef SectionSuffix) const {
   // TODO: Share code between this function and
   // MCObjectInfo::initELFMCObjectFileInfo.
-  if (SectionPrefix.empty())
+  if (SectionSuffix.empty())
     return getSectionForConstant(DL, Kind, C, Alignment);
 
   auto &Context = getContext();
   if (Kind.isMergeableConst4() && MergeableConst4Section)
-    return Context.getELFSection(".rodata.cst4." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst4." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
   if (Kind.isMergeableConst8() && MergeableConst8Section)
-    return Context.getELFSection(".rodata.cst8." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst8." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
   if (Kind.isMergeableConst16() && MergeableConst16Section)
-    return Context.getELFSection(".rodata.cst16." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst16." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
   if (Kind.isMergeableConst32() && MergeableConst32Section)
-    return Context.getELFSection(".rodata.cst32." + SectionPrefix,
+    return Context.getELFSection(".rodata.cst32." + SectionSuffix,
                                  ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
   if (Kind.isReadOnly())
-    return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS,
+    return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS,
                                  ELF::SHF_ALLOC);
 
   assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return Context.getELFSection(".data.rel.ro" + SectionPrefix,
+  return Context.getELFSection(".data.rel.ro." + SectionSuffix,
                                ELF::SHT_PROGBITS,
                                ELF::SHF_ALLOC | ELF::SHF_WRITE);
 }

>From 4f91e5c74afbe35efface1031ad8ae75c7fabe1e Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 26 Mar 2025 11:25:19 -0700
Subject: [PATCH 3/4] resolve comments

---
 .../AArch64/constant-pool-partition.ll        | 133 +++++++++++-------
 .../CodeGen/X86/constant-pool-partition.ll    |  88 +++++++-----
 2 files changed, 131 insertions(+), 90 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
index 5d2df59d34317..74b3632f39a7e 100644
--- a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -14,56 +14,84 @@
 ; secondly, and then hot_func. Specifically, tests that
 ; - If a constant is accessed by hot functions, all constant pools for this
 ;   constant (e.g., from an unprofiled function, or cold function) should have
-;   `.hot` suffix.
+;   `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and
+;   @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot`
+;   suffix.
 ; - Similarly if a constant is accessed by both cold function and un-profiled
 ;   function, constant pools for this constant should not have `.unlikely` suffix.
 
-; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI0_0:
-; CHECK:	   .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
-; CHECK:     .section	.rodata.cst8.unlikely,"aM", at progbits,8
-; CHECK: .LCPI0_1:
-; CHECK:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
-; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI0_2:
-; CHECK:     .byte   0                               // 0x0
-; CHECK:     .byte   4                               // 0x4
-; CHECK:     .byte   8                               // 0x8
-; CHECK:     .byte   12                              // 0xc
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-
-; CHECK:	   .section	.rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI1_0:
-; CHECK:     .byte   0                               // 0x0
-; CHECK:     .byte   4                               // 0x4
-; CHECK:     .byte   8                               // 0x8
-; CHECK:     .byte   12                              // 0xc
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:     .byte   255                             // 0xff
-; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
-; CHECK: .LCPI1_1:
-; CHECK:      .word   442                             // 0x1ba
-; CHECK:      .word   100                             // 0x64
-; CHECK:      .word   0                               // 0x0
-; CHECK:      .word   0                               // 0x0
-
+;; Constant pools for function @cold_func.
+; CHECK:       .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI0_0:
+; CHECK-NEXT:	    .xword	0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK-NEXT: .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI0_1:
+; CHECK-NEXT:     .xword 0x3fe5eb851eb851ec              // double 0.68500000000000005
+; CHECK-NEXT:	.section	.rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI0_2:
+; CHECK-NEXT:     .byte   0                               // 0x0
+; CHECK-NEXT:     .byte   4                               // 0x4
+; CHECK-NEXT:     .byte   8                               // 0x8
+; CHECK-NEXT:     .byte   12                              // 0xc
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+
+;; Constant pools for function @unprofiled_func
+; CHECK:	    .section	.rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI1_0:
+; CHECK-NEXT:     .byte   0                               // 0x0
+; CHECK-NEXT:     .byte   4                               // 0x4
+; CHECK-NEXT:     .byte   8                               // 0x8
+; CHECK-NEXT:     .byte   12                              // 0xc
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT:     .byte   255                             // 0xff
+; CHECK-NEXT: .section .rodata.cst16,"aM", at progbits,16 
+; CHECK-NEXT:     .p2align 
+; CHECK-NEXT:   .LCPI1_1: 
+; CHECK-NEXT:     .word 2                                 // 0x2 
+; CHECK-NEXT:     .word 3                                 // 0x3 
+; CHECK-NEXT:     .word 5                                 // 0x5 
+; CHECK-NEXT:     .word 7                                 // 0x7 
+; CHECK-NEXT: .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI1_2:
+; CHECK-NEXT:     .word   442                             // 0x1ba
+; CHECK-NEXT:     .word   100                             // 0x64
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   0                               // 0x0
+
+;; Constant pools for function @hot_func
 ; CHECK:      .section        .rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI2_0:
-; CHECK:      .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
-; CHECK:      .section        .rodata.cst16.hot,"aM", at progbits,16
-; CHECK: .LCPI2_1:
-; CHECK:      .word   442                             // 0x1ba
-; CHECK:      .word   100                             // 0x64
-; CHECK:      .word   0                               // 0x0
-; CHECK:      .word   0                               // 0x0
-
-; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
-; CHECK:    .globl	val
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI2_0:
+; CHECK-NEXT:     .xword  0x3fe5c28f5c28f5c3              // double 0.68000000000000005
+; CHECK-NEXT: .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT:     .p2align
+; CHECK-NEXT:   .LCPI2_1:
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   100                             // 0x64
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   442                             // 0x1ba
+; CHECK-NEXT:   .LCPI2_2:
+; CHECK-NEXT:     .word   442                             // 0x1ba
+; CHECK-NEXT:     .word   100                             // 0x64
+; CHECK-NEXT:     .word   0                               // 0x0
+; CHECK-NEXT:     .word   0                               // 0x0
+
+;; For global variable @val
+;; The section name remains `.rodata.cst32` without hotness prefix because
+;; the variable has external linkage and not analyzed. Compiler need symbolized
+;; data access profiles to annotate such global variables' hotness.
+; CHECK:       .section	.rodata.cst32,"aM", at progbits,32
+; CHECK-NEXT:  .globl	val
 
 define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
@@ -83,14 +111,16 @@ define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
   %t2 = bitcast <8 x i8> %t1 to <4 x i16>
   %t3 = zext <4 x i16> %t2 to <4 x i32>
-  %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3
+  %t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7>
+  %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t4
   ret <4 x i1> %cmp
 }
 
 define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
-  %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
-  ret <4 x i1> %b
+  %b = add <4 x i32> <i32 0, i32 100, i32 0, i32 442>, %a
+  %c = icmp ule <4 x i32> %b, <i32 442, i32 100, i32 0, i32 0>
+  ret <4 x i1> %c
 }
 
 @val = unnamed_addr constant i256 1
@@ -107,14 +137,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 {
 
 7:                                                ; preds = %7, %2
   %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
-  %9 = call i32 @rand()
+  %seed_val = load i256, ptr @val
+  %9 = call i32 @seed(i256 %seed_val)
   call void @hot_func(i32 %9)
   %10 = add i32 %8, 1
   %11 = icmp eq i32 %10, 100000
   br i1 %11, label %5, label %7, !prof !18
 }
 
-declare i32 @rand()
+declare i32 @seed(i256)
 declare double @double_func()
 declare <4 x i32> @vector_func()
 declare <16 x i8> @vector_func_16i8()
diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll
index e39a5d2026dd7..a1f16896a6094 100644
--- a/llvm/test/CodeGen/X86/constant-pool-partition.ll
+++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll
@@ -25,42 +25,52 @@ target triple = "x86_64-grtev4-linux-gnu"
 ; RUN:     -unique-section-names=false \
 ; RUN:     %s -o - 2>&1 | FileCheck %s --dump-input=always
 
-; CHECK:     .section	.rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI0_0:
-; CHECK:	   .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
-; CHECK: 	   .section	.rodata.cst8.unlikely,"aM", at progbits,8
-; CHECK: .LCPI0_1:
-; CHECK:	   .quad	0x3eb0000000000000              # double 9.5367431640625E-7
-
-; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI0_2:
-; CHECK:     .quad  0x3fc0000000000000              # double 0.125
-
-; CHECK:     .section        .rodata.cst8,"aM", at progbits,8
-; CHECK: .LCPI1_0:
-; CHECK:     .quad   0x3fc0000000000000              # double 0.125
-
-; CHECK:     .section        .rodata.cst4,"aM", at progbits,4
-; CHECK: .LCPI2_0:
-; CHECK:     .long   0x3e000000              # float 0.125
-
-; CHECK:	   .section	.rodata.cst8.hot,"aM", at progbits,8
-; CHECK: .LCPI3_0:
-; CHECK:     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
-; CHECK:     .section        .rodata.cst16.hot,"aM", at progbits,16
-; CHECK: .LCPI3_1:
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK: .LCPI3_2:
-; CHECK:     .long   2147484090                      # 0x800001ba
-; CHECK:     .long   2147483748                      # 0x80000064
-; CHECK:     .long   2147483648                      # 0x80000000
-; CHECK:     .long   2147483648                      # 0x80000000
-
-; CHECK:    .section	.rodata.cst32,"aM", at progbits,32
-; CHECK:    .globl	val
+;; For function @cold_func
+; CHECK:       .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT:      .p2align 
+; CHECK-NEXT:    .LCPI0_0:
+; CHECK-NEXT:	     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK-NEXT:  .section	.rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI0_1:
+; CHECK-NEXT:	     .quad	0x3eb0000000000000              # double 9.5367431640625E-7
+; CHECK-NEXT:  .section        .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI0_2:
+; CHECK-NEXT:      .quad  0x3fc0000000000000              # double 0.125
+
+;; For function @unprofiled_func_double
+; CHECK:       .section        .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT:      .p2align       
+; CHECK-NEXT:    .LCPI1_0:
+; CHECK-NEXT:     .quad   0x3fc0000000000000              # double 0.125
+
+;; For function @unprofiled_func_float
+; CHECK:       .section        .rodata.cst4,"aM", at progbits,4
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI2_0:
+; CHECK-NEXT:     .long   0x3e000000              # float 0.125
+
+;; For function @hot_func
+; CHECK:	     .section	.rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI3_0:
+; CHECK-NEXT:     .quad	0x3fe5c28f5c28f5c3              # double 0.68000000000000005
+; CHECK-NEXT:  .section        .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT:      .p2align
+; CHECK-NEXT:    .LCPI3_1:
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:    .LCPI3_2:
+; CHECK-NEXT:      .long   2147484090                      # 0x800001ba
+; CHECK-NEXT:      .long   2147483748                      # 0x80000064
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+; CHECK-NEXT:      .long   2147483648                      # 0x80000000
+
+; CHECK:       .section	.rodata.cst32,"aM", at progbits,32
+; CHECK-NEXT:  .globl	val
 
 define double @cold_func(double %x) !prof !16 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
@@ -79,7 +89,6 @@ define float @unprofiled_func_float(float %x) {
   ret float %z
 }
 
-
 define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
   %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
   %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
@@ -98,14 +107,15 @@ define i32 @main(i32 %0, ptr %1) !prof !16 {
 
 7:                                                ; preds = %7, %2
   %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
-  %9 = call i32 @rand()
+  %seed_val = load i256, ptr @val
+  %9 = call i32 @seed(i256 %seed_val)
   call void @hot_func(i32 %9)
   %10 = add i32 %8, 1
   %11 = icmp eq i32 %10, 100000
   br i1 %11, label %5, label %7, !prof !18
 }
 
-declare i32 @rand()
+declare i32 @seed(i256)
 declare double @double_func()
 declare i32 @func_taking_arbitrary_param(...)
 

>From 99cd5317d963ffa312bd13247e64854ee32c9454 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 26 Mar 2025 13:11:05 -0700
Subject: [PATCH 4/4] clang-format

---
 llvm/lib/CodeGen/StaticDataSplitter.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index f6d9c55952c52..9ed5d33fd2524 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -58,10 +58,9 @@ class StaticDataSplitter : public MachineFunctionPass {
   // .data.rel.ro} sections.
   bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
 
-    // Returns the constant if the operand refers to a global variable or constant
+  // Returns the constant if the operand refers to a global variable or constant
   // that gets lowered to static data sections. Otherwise, return nullptr.
-  const Constant *getConstant(const MachineOperand &Op,
-                              const TargetMachine &TM,
+  const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM,
                               const MachineConstantPool *MCP);
 
   // Use profiles to partition static data.
@@ -247,7 +246,8 @@ void StaticDataSplitter::annotateStaticDataWithoutProfiles(
   for (const auto &MBB : MF)
     for (const MachineInstr &I : MBB)
       for (const MachineOperand &Op : I.operands())
-        if (const Constant *C = getConstant(Op, MF.getTarget(), MF.getConstantPool()))
+        if (const Constant *C =
+                getConstant(Op, MF.getTarget(), MF.getConstantPool()))
           SDPI->addConstantProfileCount(C, std::nullopt);
 }
 



More information about the llvm-branch-commits mailing list