[llvm] [PowerPC][GlobalMerge] Reduce TOC usage by merging internal and private global data (PR #101224)

Amy Kwan via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 13 21:21:16 PDT 2024


https://github.com/amy-kwan updated https://github.com/llvm/llvm-project/pull/101224

>From a98f7699356b768a9f46649ee2eaec51570b43f1 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Tue, 30 Jul 2024 12:53:15 -0500
Subject: [PATCH 1/3] [PowerPC][GlobalMerge] Reduce TOC usage by merging
 internal and private global data

This patch aims to reduce TOC usage by merging internal and private global data.

Moreover, we also add the GlobalMerge pass within the PPCTargetMachine pipeline,
which is disabled by default. This transformation can be enabled by -ppc-global-merge.
---
 llvm/include/llvm/CodeGen/GlobalMerge.h      |  4 +++
 llvm/include/llvm/CodeGen/Passes.h           |  4 ++-
 llvm/lib/CodeGen/GlobalMerge.cpp             | 27 ++++++++++++++++----
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 13 ++++++++++
 llvm/test/CodeGen/PowerPC/merge-private.ll   | 20 +++++++--------
 5 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalMerge.h b/llvm/include/llvm/CodeGen/GlobalMerge.h
index 13ad67d4544bc7..ef767d548dc6eb 100644
--- a/llvm/include/llvm/CodeGen/GlobalMerge.h
+++ b/llvm/include/llvm/CodeGen/GlobalMerge.h
@@ -28,6 +28,10 @@ struct GlobalMergeOptions {
   bool MergeConst = false;
   /// Whether we should merge global variables that have external linkage.
   bool MergeExternal = true;
+  /// Whether we should merge global variables that have private linkage.
+  bool MergePrivateGlobals = false;
+  /// Whether we should merge constant global variables.
+  bool MergeConstantGlobals = false;
   /// Whether we should try to optimize for size only.
   /// Currently, this applies a dead simple heuristic: only consider globals
   /// used in minsize functions for merging.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 20273d069bf053..df67d35cfdd2c0 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -476,7 +476,9 @@ namespace llvm {
   ///
   Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
                               bool OnlyOptimizeForSize = false,
-                              bool MergeExternalByDefault = false);
+                              bool MergeExternalByDefault = false,
+                              bool MergePrivateByDefault = false,
+                              bool MergeConstantByDefault = false);
 
   /// This pass splits the stack into a safe stack and an unsafe stack to
   /// protect against stack-based overflow vulnerabilities.
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 8aa4345cfd6df6..46bda37ea45a53 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -196,11 +196,14 @@ class GlobalMerge : public FunctionPass {
   }
 
   explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
-                       bool OnlyOptimizeForSize, bool MergeExternalGlobals)
+                       bool OnlyOptimizeForSize, bool MergeExternalGlobals,
+                       bool MergePrivateGlobals, bool MergeConstantGlobals)
       : FunctionPass(ID), TM(TM) {
     Opt.MaxOffset = MaximalOffset;
     Opt.SizeOnly = OnlyOptimizeForSize;
     Opt.MergeExternal = MergeExternalGlobals;
+    Opt.MergePrivateGlobals = MergePrivateGlobals;
+    Opt.MergeConstantGlobals = MergeConstantGlobals;
     initializeGlobalMergePass(*PassRegistry::getPassRegistry());
   }
 
@@ -475,7 +478,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
   auto &DL = M.getDataLayout();
 
   LLVM_DEBUG(dbgs() << " Trying to merge set, starts with #"
-                    << GlobalSet.find_first() << "\n");
+                    << GlobalSet.find_first() << ", total of " << Globals.size()
+                    << "\n");
 
   bool Changed = false;
   ssize_t i = GlobalSet.find_first();
@@ -551,6 +555,8 @@ bool GlobalMergeImpl::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
     MergedGV->setAlignment(MaxAlign);
     MergedGV->setSection(Globals[i]->getSection());
 
+    LLVM_DEBUG(dbgs() << "MergedGV:  " << *MergedGV << "\n");
+
     const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
     for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
@@ -700,6 +706,11 @@ bool GlobalMergeImpl::run(Module &M) {
       else
         Globals[{AddressSpace, Section}].push_back(&GV);
     }
+    LLVM_DEBUG(dbgs() << "GV "
+                      << ((DL.getTypeAllocSize(Ty) < Opt.MaxOffset)
+                              ? "to merge: "
+                              : "not to merge: ")
+                      << GV << "\n");
   }
 
   for (auto &P : Globals)
@@ -710,7 +721,7 @@ bool GlobalMergeImpl::run(Module &M) {
     if (P.second.size() > 1)
       Changed |= doMerge(P.second, M, false, P.first.first);
 
-  if (EnableGlobalMergeOnConst)
+  if (Opt.MergeConstantGlobals)
     for (auto &P : ConstGlobals)
       if (P.second.size() > 1)
         Changed |= doMerge(P.second, M, true, P.first.first);
@@ -720,8 +731,14 @@ bool GlobalMergeImpl::run(Module &M) {
 
 Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
                                   bool OnlyOptimizeForSize,
-                                  bool MergeExternalByDefault) {
+                                  bool MergeExternalByDefault,
+                                  bool MergePrivateByDefault,
+                                  bool MergeConstantByDefault) {
   bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
     MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
-  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
+  bool MergeConstant = EnableGlobalMergeOnConst.getNumOccurrences() > 0
+                           ? EnableGlobalMergeOnConst
+                           : MergeConstantByDefault;
+  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
+                         MergePrivateByDefault, MergeConstant);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1ef891d1b677a2..e4045ec3044355 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -111,6 +111,15 @@ static cl::opt<bool> EnablePPCGenScalarMASSEntries(
              "(scalar) entries"),
     cl::Hidden);
 
+static cl::opt<bool>
+    EnableGlobalMerge("ppc-global-merge", cl::Hidden, cl::init(false),
+                      cl::desc("Enable the global merge pass"));
+
+static cl::opt<unsigned>
+    GlobalMergeMaxOffset("ppc-global-merge-max-offset", cl::Hidden,
+                         cl::init(0x7fff),
+                         cl::desc("Maximum global merge offset"));
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
   // Register the targets
   RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
@@ -491,6 +500,10 @@ void PPCPassConfig::addIRPasses() {
 }
 
 bool PPCPassConfig::addPreISel() {
+  if (EnableGlobalMerge)
+    addPass(createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true,
+                                  true));
+
   if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None)
     addPass(createPPCMergeStringPoolPass());
 
diff --git a/llvm/test/CodeGen/PowerPC/merge-private.ll b/llvm/test/CodeGen/PowerPC/merge-private.ll
index 6cf276990d7ea2..6ed2d6dfc542b7 100644
--- a/llvm/test/CodeGen/PowerPC/merge-private.ll
+++ b/llvm/test/CodeGen/PowerPC/merge-private.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \
-; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
 ; RUN:     --check-prefix=AIX64
 ; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \
-; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
 ; RUN:     --check-prefix=AIX32
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux -mcpu=pwr8 \
-; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
 ; RUN:     --check-prefix=LINUX64LE
 ; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux -mcpu=pwr8 \
-; RUN:     -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:     -ppc-asm-full-reg-names -ppc-global-merge=true < %s | FileCheck %s \
 ; RUN:     --check-prefix=LINUX64BE
 
 @.str = private unnamed_addr constant [15 x i8] c"Private global\00", align 1
@@ -24,7 +24,7 @@ define dso_local void @print_func() {
 ; AIX64-NEXT:    stdu r1, -128(r1)
 ; AIX64-NEXT:    std r0, 144(r1)
 ; AIX64-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
-; AIX64-NEXT:    ld r31, L..C0(r2) # @__ModuleStringPool
+; AIX64-NEXT:    ld r31, L..C0(r2) # @_MergedGlobals
 ; AIX64-NEXT:    mr r3, r31
 ; AIX64-NEXT:    bl .puts[PR]
 ; AIX64-NEXT:    nop
@@ -43,7 +43,7 @@ define dso_local void @print_func() {
 ; AIX32-NEXT:    stwu r1, -64(r1)
 ; AIX32-NEXT:    stw r0, 72(r1)
 ; AIX32-NEXT:    stw r31, 60(r1) # 4-byte Folded Spill
-; AIX32-NEXT:    lwz r31, L..C0(r2) # @__ModuleStringPool
+; AIX32-NEXT:    lwz r31, L..C0(r2) # @_MergedGlobals
 ; AIX32-NEXT:    mr r3, r31
 ; AIX32-NEXT:    bl .puts[PR]
 ; AIX32-NEXT:    nop
@@ -64,9 +64,9 @@ define dso_local void @print_func() {
 ; LINUX64LE-NEXT:    .cfi_offset r30, -16
 ; LINUX64LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; LINUX64LE-NEXT:    stdu r1, -48(r1)
-; LINUX64LE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64LE-NEXT:    addis r3, r2, .L_MergedGlobals at toc@ha
 ; LINUX64LE-NEXT:    std r0, 64(r1)
-; LINUX64LE-NEXT:    addi r30, r3, .L__ModuleStringPool at toc@l
+; LINUX64LE-NEXT:    addi r30, r3, .L_MergedGlobals at toc@l
 ; LINUX64LE-NEXT:    mr r3, r30
 ; LINUX64LE-NEXT:    bl puts
 ; LINUX64LE-NEXT:    nop
@@ -87,9 +87,9 @@ define dso_local void @print_func() {
 ; LINUX64BE-NEXT:    .cfi_def_cfa_offset 128
 ; LINUX64BE-NEXT:    .cfi_offset lr, 16
 ; LINUX64BE-NEXT:    .cfi_offset r30, -16
-; LINUX64BE-NEXT:    addis r3, r2, .L__ModuleStringPool at toc@ha
+; LINUX64BE-NEXT:    addis r3, r2, .L_MergedGlobals at toc@ha
 ; LINUX64BE-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
-; LINUX64BE-NEXT:    addi r30, r3, .L__ModuleStringPool at toc@l
+; LINUX64BE-NEXT:    addi r30, r3, .L_MergedGlobals at toc@l
 ; LINUX64BE-NEXT:    mr r3, r30
 ; LINUX64BE-NEXT:    bl puts
 ; LINUX64BE-NEXT:    nop

>From 2b3b0063a37ff54d671d81f3e8fc47faf1f2c12a Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Thu, 1 Aug 2024 12:58:44 -0500
Subject: [PATCH 2/3] Remove unused MergePrivateGlobals

---
 llvm/include/llvm/CodeGen/GlobalMerge.h      |  2 --
 llvm/include/llvm/CodeGen/Passes.h           |  1 -
 llvm/lib/CodeGen/GlobalMerge.cpp             | 10 +++-------
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp |  2 +-
 4 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalMerge.h b/llvm/include/llvm/CodeGen/GlobalMerge.h
index ef767d548dc6eb..1577bcf8903f52 100644
--- a/llvm/include/llvm/CodeGen/GlobalMerge.h
+++ b/llvm/include/llvm/CodeGen/GlobalMerge.h
@@ -28,8 +28,6 @@ struct GlobalMergeOptions {
   bool MergeConst = false;
   /// Whether we should merge global variables that have external linkage.
   bool MergeExternal = true;
-  /// Whether we should merge global variables that have private linkage.
-  bool MergePrivateGlobals = false;
   /// Whether we should merge constant global variables.
   bool MergeConstantGlobals = false;
   /// Whether we should try to optimize for size only.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index df67d35cfdd2c0..c7c2178571215b 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -477,7 +477,6 @@ namespace llvm {
   Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
                               bool OnlyOptimizeForSize = false,
                               bool MergeExternalByDefault = false,
-                              bool MergePrivateByDefault = false,
                               bool MergeConstantByDefault = false);
 
   /// This pass splits the stack into a safe stack and an unsafe stack to
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 46bda37ea45a53..c31ba6b31ad9ac 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -197,12 +197,11 @@ class GlobalMerge : public FunctionPass {
 
   explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset,
                        bool OnlyOptimizeForSize, bool MergeExternalGlobals,
-                       bool MergePrivateGlobals, bool MergeConstantGlobals)
+                       bool MergeConstantGlobals)
       : FunctionPass(ID), TM(TM) {
     Opt.MaxOffset = MaximalOffset;
     Opt.SizeOnly = OnlyOptimizeForSize;
     Opt.MergeExternal = MergeExternalGlobals;
-    Opt.MergePrivateGlobals = MergePrivateGlobals;
     Opt.MergeConstantGlobals = MergeConstantGlobals;
     initializeGlobalMergePass(*PassRegistry::getPassRegistry());
   }
@@ -732,13 +731,10 @@ bool GlobalMergeImpl::run(Module &M) {
 Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
                                   bool OnlyOptimizeForSize,
                                   bool MergeExternalByDefault,
-                                  bool MergePrivateByDefault,
                                   bool MergeConstantByDefault) {
   bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
     MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
-  bool MergeConstant = EnableGlobalMergeOnConst.getNumOccurrences() > 0
-                           ? EnableGlobalMergeOnConst
-                           : MergeConstantByDefault;
+  bool MergeConstant = EnableGlobalMergeOnConst || MergeConstantByDefault;
   return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
-                         MergePrivateByDefault, MergeConstant);
+                         MergeConstant);
 }
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index e4045ec3044355..6a502230482816 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -501,7 +501,7 @@ void PPCPassConfig::addIRPasses() {
 
 bool PPCPassConfig::addPreISel() {
   if (EnableGlobalMerge)
-    addPass(createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true,
+    addPass(createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false,
                                   true));
 
   if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None)

>From 40e0221120f1884d7c6d429fadb3671b18b8ac93 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1 at ibm.com>
Date: Tue, 13 Aug 2024 23:21:00 -0500
Subject: [PATCH 3/3] Address clang-format comment

---
 llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 6a502230482816..763b6edb1c09fb 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -501,8 +501,8 @@ void PPCPassConfig::addIRPasses() {
 
 bool PPCPassConfig::addPreISel() {
   if (EnableGlobalMerge)
-    addPass(createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false,
-                                  true));
+    addPass(
+        createGlobalMergePass(TM, GlobalMergeMaxOffset, false, false, true));
 
   if (MergeStringPool && getOptLevel() != CodeGenOptLevel::None)
     addPass(createPPCMergeStringPoolPass());



More information about the llvm-commits mailing list