[llvm] 9747bb1 - [CodeGen][StaticDataSplitter]Support constant pool partitioning (#129781)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 29 22:08:00 PDT 2025
Author: Mingming Liu
Date: 2025-03-29T22:07:56-07:00
New Revision: 9747bb182f430bb1bd3525b7f42e88df626e28e5
URL: https://github.com/llvm/llvm-project/commit/9747bb182f430bb1bd3525b7f42e88df626e28e5
DIFF: https://github.com/llvm/llvm-project/commit/9747bb182f430bb1bd3525b7f42e88df626e28e5.diff
LOG: [CodeGen][StaticDataSplitter]Support constant pool partitioning (#129781)
This is a follow-up patch of
https://github.com/llvm/llvm-project/pull/125756
In this PR, static-data-splitter pass produces the aggregated profile
counts of constants for constant pools in a global state
(`StateDataProfileInfo`), and asm printer consumes the profile counts to
produce `.hot` or `.unlikely` prefixes.
This implementation covers both x86 and aarch64 asm printer.
Added:
llvm/test/CodeGen/AArch64/constant-pool-partition.ll
llvm/test/CodeGen/X86/constant-pool-partition.ll
Modified:
llvm/include/llvm/CodeGen/AsmPrinter.h
llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
llvm/include/llvm/Target/TargetLoweringObjectFile.h
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
llvm/lib/CodeGen/StaticDataSplitter.cpp
llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
llvm/lib/Target/TargetLoweringObjectFile.cpp
llvm/lib/Target/X86/X86AsmPrinter.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 4dd45a1a7774d..16363fbaa4f9a 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
/// default, this is equal to CurrentFnSym.
MCSymbol *CurrentFnSymForSize = nullptr;
+ /// Provides the profile information for constants.
+ const StaticDataProfileInfo *SDPI = nullptr;
+
+ /// The profile summary information.
+ const ProfileSummaryInfo *PSI = nullptr;
+
/// Map a basic block section ID to the begin and end symbols of that section
/// which determine the section's range.
struct MBBSectionRange {
@@ -330,6 +338,10 @@ class AsmPrinter : public MachineFunctionPass {
DwarfUsesRelocationsAcrossSections = Enable;
}
+ /// Returns a section suffix (hot or unlikely) for the constant if profiles
+ /// are available. Returns empty string otherwise.
+ StringRef getConstantSectionSuffix(const Constant *C) const;
+
//===------------------------------------------------------------------===//
// XRay instrumentation implementation.
//===------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 7c929262f6823..8b0e5798d1b61 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -66,6 +66,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
const Constant *C,
Align &Alignment) const override;
+ /// Similar to the function above, but append \p SectionSuffix to the section
+ /// name.
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+ const Constant *C, Align &Alignment,
+ StringRef SectionSuffix) const override;
+
MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 9fc09bb7db6c2..47617424a9688 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
SectionKind Kind, const Constant *C,
Align &Alignment) const;
+ /// Similar to the function above, but append \p SectionSuffix to the section
+ /// name.
+ virtual MCSection *getSectionForConstant(const DataLayout &DL,
+ SectionKind Kind, const Constant *C,
+ Align &Alignment,
+ StringRef SectionSuffix) const;
+
virtual MCSection *
getSectionForMachineBasicBlock(const Function &F,
const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c626202753824..2d76aa5488333 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2769,6 +2769,13 @@ namespace {
} // end anonymous namespace
+StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const {
+ if (TM.Options.EnableStaticDataPartitioning && C && SDPI && PSI)
+ return SDPI->getConstantSectionPrefix(C, PSI);
+
+ return "";
+}
+
/// EmitConstantPool - Print to the current output stream assembly
/// representations of the constants in the constant pool MCP. This is
/// used to print out constants which have been "spilled to memory" by
@@ -2792,7 +2799,7 @@ void AsmPrinter::emitConstantPool() {
C = CPE.Val.ConstVal;
MCSection *S = getObjFileLowering().getSectionForConstant(
- getDataLayout(), Kind, C, Alignment);
+ getDataLayout(), Kind, C, Alignment, getConstantSectionSuffix(C));
// The number of sections are small, just do a linear search from the
// last section to the first.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 60501b4495082..8e12c5e5439ba 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -10,7 +10,7 @@
// for the following types of static data:
// - Jump tables
// - Module-internal global variables
-// - Constant pools (TODO)
+// - Constant pools
//
// For the original RFC of this pass please see
// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
@@ -60,8 +60,8 @@ class StaticDataSplitter : public MachineFunctionPass {
// Returns the constant if the operand refers to a global variable or constant
// that gets lowered to static data sections. Otherwise, return nullptr.
- const Constant *getConstant(const MachineOperand &Op,
- const TargetMachine &TM);
+ const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM,
+ const MachineConstantPool *MCP);
// Use profiles to partition static data.
bool partitionStaticDataWithProfiles(MachineFunction &MF);
@@ -89,8 +89,11 @@ class StaticDataSplitter : public MachineFunctionPass {
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<StaticDataProfileInfoWrapperPass>();
- // This pass does not modify the CFG.
- AU.setPreservesCFG();
+ // This pass does not modify any required analysis results except
+ // StaticDataProfileInfoWrapperPass, but StaticDataProfileInfoWrapperPass
+ // is made an immutable pass that it won't be re-scheduled by pass manager
+ // anyway. So mark setPreservesAll() here for faster compile time.
+ AU.setPreservesAll();
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -119,40 +122,63 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op,
- const TargetMachine &TM) {
- if (!Op.isGlobal())
+const Constant *
+StaticDataSplitter::getConstant(const MachineOperand &Op,
+ const TargetMachine &TM,
+ const MachineConstantPool *MCP) {
+ if (!Op.isGlobal() && !Op.isCPI())
return nullptr;
- // Find global variables with local linkage.
- const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
- // Skip 'llvm.'-prefixed global variables conservatively because they are
- // often handled specially, and skip those not in static data sections.
- if (!GV || GV->getName().starts_with("llvm.") ||
- !inStaticDataSection(*GV, TM))
+ if (Op.isGlobal()) {
+ // Find global variables with local linkage.
+ const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+ // Skip 'llvm.'-prefixed global variables conservatively because they are
+ // often handled specially, and skip those not in static data
+ // sections.
+ if (!GV || GV->getName().starts_with("llvm.") ||
+ !inStaticDataSection(*GV, TM))
+ return nullptr;
+ return GV;
+ }
+ assert(Op.isCPI() && "Op must be constant pool index in this branch");
+ int CPI = Op.getIndex();
+ if (CPI == -1)
+ return nullptr;
+
+ assert(MCP != nullptr && "Constant pool info is not available.");
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+ if (CPE.isMachineConstantPoolEntry())
return nullptr;
- return GV;
+
+ return CPE.Val.ConstVal;
}
bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
- int NumChangedJumpTables = 0;
+ // If any of the static data (jump tables, global variables, constant pools)
+ // are captured by the analysis, set `Changed` to true. Note this pass won't
+ // invalidate any analysis pass (see `getAnalysisUsage` above), so the main
+ // purpose of tracking and conveying the change (to pass manager) is
+ // informative as opposed to invalidating any analysis results. As an example
+ // of where this information is useful, `PMDataManager::dumpPassInfo` will
+ // only dump pass info if a local change happens, otherwise a pass appears as
+ // "skipped".
+ bool Changed = false;
- const TargetMachine &TM = MF.getTarget();
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
// Jump table could be used by either terminating instructions or
// non-terminating ones, so we walk all instructions and use
// `MachineOperand::isJTI()` to identify jump table operands.
- // Similarly, `MachineOperand::isCPI()` can identify constant pool usages
- // in the same loop.
+ // Similarly, `MachineOperand::isCPI()` is used to identify constant pool
+ // usages in the same loop.
for (const auto &MBB : MF) {
+ std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
for (const MachineInstr &I : MBB) {
for (const MachineOperand &Op : I.operands()) {
- if (!Op.isJTI() && !Op.isGlobal())
+ if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI())
continue;
- std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
-
if (Op.isJTI()) {
assert(MJTI != nullptr && "Jump table info is not available.");
const int JTI = Op.getIndex();
@@ -168,15 +194,16 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
if (Count && PSI->isColdCount(*Count))
Hotness = MachineFunctionDataHotness::Cold;
- if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
- ++NumChangedJumpTables;
- } else if (const Constant *C = getConstant(Op, TM)) {
+ Changed |= MJTI->updateJumpTableEntryHotness(JTI, Hotness);
+ } else if (const Constant *C =
+ getConstant(Op, MF.getTarget(), MF.getConstantPool())) {
SDPI->addConstantProfileCount(C, Count);
+ Changed = true;
}
}
}
}
- return NumChangedJumpTables > 0;
+ return Changed;
}
const GlobalVariable *
@@ -218,7 +245,8 @@ void StaticDataSplitter::annotateStaticDataWithoutProfiles(
for (const auto &MBB : MF)
for (const MachineInstr &I : MBB)
for (const MachineOperand &Op : I.operands())
- if (const Constant *C = getConstant(Op, MF.getTarget()))
+ if (const Constant *C =
+ getConstant(Op, MF.getTarget(), MF.getConstantPool()))
SDPI->addConstantProfileCount(C, std::nullopt);
}
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index dd6d85e3662db..4c20c5dc74d9a 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1068,6 +1068,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
return DataRelROSection;
}
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+ StringRef SectionSuffix) const {
+ // TODO: Share code between this function and
+ // MCObjectInfo::initELFMCObjectFileInfo.
+ if (SectionSuffix.empty())
+ return getSectionForConstant(DL, Kind, C, Alignment);
+
+ auto &Context = getContext();
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return Context.getELFSection(".rodata.cst4." + SectionSuffix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return Context.getELFSection(".rodata.cst8." + SectionSuffix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return Context.getELFSection(".rodata.cst16." + SectionSuffix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
+ if (Kind.isMergeableConst32() && MergeableConst32Section)
+ return Context.getELFSection(".rodata.cst32." + SectionSuffix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
+ if (Kind.isReadOnly())
+ return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return Context.getELFSection(".data.rel.ro." + SectionSuffix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+}
+
/// Returns a unique section for the given machine basic block.
MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
const Function &F, const MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index ff1aee9bda6e5..d29a72a4f6884 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -226,6 +226,12 @@ class AArch64AsmPrinter : public AsmPrinter {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>())
+ PSI = &PSIW->getPSI();
+ if (auto *SDPIW =
+ getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>())
+ SDPI = &SDPIW->getStaticDataProfileInfo();
+
AArch64FI = MF.getInfo<AArch64FunctionInfo>();
STI = &MF.getSubtarget<AArch64Subtarget>();
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index cab9bc8678a58..0920c3345ecf3 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -385,6 +385,18 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
return DataSection;
}
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+ StringRef SectionPrefix) const {
+ // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it
+ // is empty.
+ if (SectionPrefix.empty())
+ return getSectionForConstant(DL, Kind, C, Alignment);
+ report_fatal_error(
+ "TargetLoweringObjectFile::getSectionForConstant that "
+ "accepts SectionPrefix is not implemented for the object file format");
+}
+
MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock(
const Function &F, const MachineBasicBlock &MBB,
const TargetMachine &TM) const {
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 79aa898e18bfa..a227afe37d737 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -61,6 +62,11 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM,
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ if (auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>())
+ PSI = &PSIW->getPSI();
+ if (auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>())
+ SDPI = &SDPIW->getStaticDataProfileInfo();
+
Subtarget = &MF.getSubtarget<X86Subtarget>();
SMShadowTracker.startFunction(MF);
diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
new file mode 100644
index 0000000000000..ab627b02a1bc7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+; constant (e.g., from an unprofiled function, or cold function) should have
+; `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and
+; @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot`
+; suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+; function, constant pools for this constant should not have `.unlikely` suffix.
+
+;; Constant pools for function @cold_func.
+; CHECK: .section .rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
+; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI0_1:
+; CHECK-NEXT: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005
+; CHECK-NEXT: .section .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI0_2:
+; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 8 // 0x8
+; CHECK-NEXT: .byte 12 // 0xc
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .byte 255 // 0xff
+
+;; Constant pools for function @unprofiled_func
+; CHECK: .section .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI1_0:
+; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 8 // 0x8
+; CHECK-NEXT: .byte 12 // 0xc
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .byte 255 // 0xff
+; CHECK-NEXT: .section .rodata.cst16,"aM", at progbits,16
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI1_1:
+; CHECK-NEXT: .word 2 // 0x2
+; CHECK-NEXT: .word 3 // 0x3
+; CHECK-NEXT: .word 5 // 0x5
+; CHECK-NEXT: .word 7 // 0x7
+; CHECK-NEXT: .section .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI1_2:
+; CHECK-NEXT: .word 442 // 0x1ba
+; CHECK-NEXT: .word 100 // 0x64
+; CHECK-NEXT: .word 0 // 0x0
+; CHECK-NEXT: .word 0 // 0x0
+
+;; Constant pools for function @hot_func
+; CHECK: .section .rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI2_0:
+; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
+; CHECK-NEXT: .section .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI2_1:
+; CHECK-NEXT: .word 0 // 0x0
+; CHECK-NEXT: .word 100 // 0x64
+; CHECK-NEXT: .word 0 // 0x0
+; CHECK-NEXT: .word 442 // 0x1ba
+; CHECK-NEXT: .LCPI2_2:
+; CHECK-NEXT: .word 442 // 0x1ba
+; CHECK-NEXT: .word 100 // 0x64
+; CHECK-NEXT: .word 0 // 0x0
+; CHECK-NEXT: .word 0 // 0x0
+
+;; For global variable @val
+;; The section name remains `.rodata.cst32` without hotness prefix because
+;; the variable has external linkage and not analyzed. Compiler need symbolized
+;; data access profiles to annotate such global variables' hotness.
+; CHECK: .section .rodata.cst32,"aM", at progbits,32
+; CHECK-NEXT: .globl val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+ %3 = extractelement <2 x i32> %t2, i32 1
+ %sum = add i32 %2, %3
+ %ret = add i32 %sum, %num
+ ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+ %t3 = zext <4 x i16> %t2 to <4 x i32>
+ %t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7>
+ %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t4
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %b = add <4 x i32> <i32 0, i32 100, i32 0, i32 442>, %a
+ %c = icmp ule <4 x i32> %b, <i32 442, i32 100, i32 0, i32 0>
+ ret <4 x i1> %c
+}
+
+ at val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+ br label %7
+
+5: ; preds = %7
+ %x = call double @double_func()
+ %a = call <16 x i8> @vector_func_16i8()
+ %b = call <16 x i8> @vector_func_16i8()
+ call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b)
+ ret i32 0
+
+7: ; preds = %7, %2
+ %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+ %seed_val = load i256, ptr @val
+ %9 = call i32 @seed(i256 %seed_val)
+ call void @hot_func(i32 %9)
+ %10 = add i32 %8, 1
+ %11 = icmp eq i32 %10, 100000
+ br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @seed(i256)
+declare double @double_func()
+declare <4 x i32> @vector_func()
+declare <16 x i8> @vector_func_16i8()
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 3, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"branch_weights", i32 1, i32 99999}
diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll
new file mode 100644
index 0000000000000..d2c87b7b3fc14
--- /dev/null
+++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll
@@ -0,0 +1,141 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from @cold_func first, two functions
+; without profiles secondly, and then @hot_func. Specifically, tests that
+; 1. If a constant is accessed by hot functions, all constant pools for this
+; constant (e.g., from an unprofiled function, or cold function) should have
+; .hot suffix.
+; 2. Similarly if a constant is accessed by both cold function and un-profiled
+; function, constant pools for this constant should not have .unlikely suffix.
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true -data-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true -data-sections=true \
+; RUN: -unique-section-names=true \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=false -data-sections=false \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+;; For function @cold_func
+; CHECK: .section .rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005
+; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI0_1:
+; CHECK-NEXT: .quad 0x3eb0000000000000 # double 9.5367431640625E-7
+; CHECK-NEXT: .section .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI0_2:
+; CHECK-NEXT: .quad 0x3fc0000000000000 # double 0.125
+
+;; For function @unprofiled_func_double
+; CHECK: .section .rodata.cst8,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI1_0:
+; CHECK-NEXT: .quad 0x3fc0000000000000 # double 0.125
+
+;; For function @unprofiled_func_float
+; CHECK: .section .rodata.cst4,"aM", at progbits,4
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI2_0:
+; CHECK-NEXT: .long 0x3e000000 # float 0.125
+
+;; For function @hot_func
+; CHECK: .section .rodata.cst8.hot,"aM", at progbits,8
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI3_0:
+; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005
+; CHECK-NEXT: .section .rodata.cst16.hot,"aM", at progbits,16
+; CHECK-NEXT: .p2align
+; CHECK-NEXT: .LCPI3_1:
+; CHECK-NEXT: .long 2147483648 # 0x80000000
+; CHECK-NEXT: .long 2147483648 # 0x80000000
+; CHECK-NEXT: .long 2147483648 # 0x80000000
+; CHECK-NEXT: .long 2147483648 # 0x80000000
+; CHECK-NEXT: .LCPI3_2:
+; CHECK-NEXT: .long 2147484090 # 0x800001ba
+; CHECK-NEXT: .long 2147483748 # 0x80000064
+; CHECK-NEXT: .long 2147483648 # 0x80000000
+; CHECK-NEXT: .long 2147483648 # 0x80000000
+
+; CHECK: .section .rodata.cst32,"aM", at progbits,32
+; CHECK-NEXT: .globl val
+
+define double @cold_func(double %x) !prof !16 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %y = fmul double %x, 0x3EB0000000000000
+ %z = fmul double %y, 0x3fc0000000000000
+ ret double %z
+}
+
+define double @unprofiled_func_double(double %x) {
+ %z = fmul double %x, 0x3fc0000000000000
+ ret double %z
+}
+
+define float @unprofiled_func_float(float %x) {
+ %z = fmul float %x, 0x3fc0000000000000
+ ret float %z
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+ ret <4 x i1> %b
+}
+
+ at val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+ br label %7
+
+5: ; preds = %7
+ %x = call double @double_func()
+ call void @cold_func(double %x)
+ ret i32 0
+
+7: ; preds = %7, %2
+ %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+ %seed_val = load i256, ptr @val
+ %9 = call i32 @seed(i256 %seed_val)
+ call void @hot_func(i32 %9)
+ %10 = add i32 %8, 1
+ %11 = icmp eq i32 %10, 100000
+ br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @seed(i256)
+declare double @double_func()
+declare i32 @func_taking_arbitrary_param(...)
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 1, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"branch_weights", i32 1, i32 99999}
More information about the llvm-commits
mailing list