[llvm] [GlobalMerge] Add MinSize feature to the GlobalMerge Pass. (PR #93686)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Wed May 29 06:51:05 PDT 2024


https://github.com/michaelmaitland created https://github.com/llvm/llvm-project/pull/93686

We add a feature that prevents the GlobalMerge pass from considering data smaller than a minimum size in bytes for merging.

The MinSize is set in 3 ways:
1. If global-merge-min-data-size is explicitly set, then it uses that value.
2. If SmallDataLimit is set and non-zero, then SmallDataLimit + 1 is used.
3. Otherwise, 0 is used, which means all sizes are considered for merging.

We found that this feature allowed us to see the benefit of the GlobalMerge pass while eliminating some merging that was not beneficial. This feature allowed us to enable the GlobalMerge pass on RISC-V in our downstream by default because it led to improvements on multiple benchmark suites.

I plan to post a separate patch to propose enabling this by default on RISC-V. But I do not want that discussion to be part of the discussion of adding this feature, so I am keeping the patches separate.

>From bae630cc7f79eb6b6f7984a15dd64f9d16589547 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 29 May 2024 06:28:15 -0700
Subject: [PATCH] [GlobalMerge] Add MinSize feature to the GlobalMerge Pass.

We add a feature that prevents the GlobalMerge pass from considering
data smaller than a minimum size in bytes for merging.

The MinSize is set in 3 ways:
1. If global-merge-min-data-size is explicitly set, then it uses that value.
2. If SmallDataLimit is set and non-zero, then SmallDataLimit + 1 is used.
3. Otherwise, 0 is used, which means all sizes are considered for merging.

This feature allowed us to enable the GlobalMerge pass on RISC-V in our
downstream by default because it led to improvements on multiple
benchmark suites without causing regressions to Geomeans.

We found that this feature allowed us to see the benefit of the
GlobalMerge pass while eliminating some merging that was not beneficial.

I plan to post a separate patch to propose enabling this by default on
RISC-V. But I do not want that discussion to be part of the discussion
of adding this feature, so I am keeping the patches separate.
---
 llvm/include/llvm/CodeGen/GlobalMerge.h       |  2 +
 llvm/lib/CodeGen/GlobalMerge.cpp              | 22 ++++++++-
 .../global-merge-minsize-smalldata-nonzero.ll | 48 ++++++++++++++++++
 .../global-merge-minsize-smalldata-zero.ll    | 49 +++++++++++++++++++
 .../CodeGen/RISCV/global-merge-minsize.ll     | 40 +++++++++++++++
 5 files changed, 160 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll
 create mode 100644 llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll
 create mode 100644 llvm/test/CodeGen/RISCV/global-merge-minsize.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalMerge.h b/llvm/include/llvm/CodeGen/GlobalMerge.h
index 6b3766ab9e024..13ad67d4544bc 100644
--- a/llvm/include/llvm/CodeGen/GlobalMerge.h
+++ b/llvm/include/llvm/CodeGen/GlobalMerge.h
@@ -21,6 +21,8 @@ struct GlobalMergeOptions {
   // functions), see the code that passes in the offset in the ARM backend
   // for more information.
   unsigned MaxOffset = 0;
+  // The minimum size in bytes of each global that should considered in merging.
+  unsigned MinSize = 0;
   bool GroupByUse = true;
   bool IgnoreSingleUse = true;
   bool MergeConst = false;
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 545ee1741834a..c39555a9c18ae 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -134,6 +134,12 @@ static cl::opt<cl::boolOrDefault>
 EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
      cl::desc("Enable global merge pass on external linkage"));
 
+static cl::opt<unsigned>
+    GlobalMergeMinDataSize("global-merge-min-data-size",
+                           cl::desc("The minimum size in bytes of each global "
+                                    "that should considered in merging."),
+                           cl::init(0), cl::Hidden);
+
 STATISTIC(NumMerged, "Number of globals merged");
 
 namespace {
@@ -198,6 +204,19 @@ class GlobalMerge : public FunctionPass {
   }
 
   bool doInitialization(Module &M) override {
+    auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
+      Metadata *SDL = M.getModuleFlag("SmallDataLimit");
+      if (!SDL)
+        return std::nullopt;
+      return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
+    };
+    if (GlobalMergeMinDataSize.getNumOccurrences())
+      Opt.MinSize = GlobalMergeMinDataSize;
+    else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
+      Opt.MinSize = *SDL + 1;
+    else
+      Opt.MinSize = 0;
+
     GlobalMergeImpl P(TM, Opt);
     return P.run(M);
   }
@@ -670,7 +689,8 @@ bool GlobalMergeImpl::run(Module &M) {
       continue;
 
     Type *Ty = GV.getValueType();
-    if (DL.getTypeAllocSize(Ty) < Opt.MaxOffset) {
+    TypeSize AllocSize = DL.getTypeAllocSize(Ty);
+    if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
       if (TM &&
           TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
         BSSGlobals[{AddressSpace, Section}].push_back(&GV);
diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll
new file mode 100644
index 0000000000000..c547138930212
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-nonzero.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=SMALL-DATA
+; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -global-merge-min-data-size=0 \
+; RUN:    -verify-machineinstrs < %s | FileCheck %s -check-prefix=MINSIZE
+
+ at ig1 = internal global i32 0, align 4
+ at ig2 = internal global i32 0, align 4
+
+ at eg1 = dso_local global i32 0, align 4
+ at eg2 = dso_local global i32 0, align 4
+
+; This test shows that GlobalDataMinSize is set to SmallDataLimit + 1 when
+; SmallDataLimit module flag is set as non-zero, and that global-merge-min-data-size
+; overrides the small data limit.
+
+define void @f1(i32 %a) nounwind {
+; SMALL-DATA-LABEL: f1:
+; SMALL-DATA:       # %bb.0:
+; SMALL-DATA-NEXT:    lui a1, %hi(ig1)
+; SMALL-DATA-NEXT:    sw a0, %lo(ig1)(a1)
+; SMALL-DATA-NEXT:    lui a1, %hi(ig2)
+; SMALL-DATA-NEXT:    sw a0, %lo(ig2)(a1)
+; SMALL-DATA-NEXT:    lui a1, %hi(eg1)
+; SMALL-DATA-NEXT:    sw a0, %lo(eg1)(a1)
+; SMALL-DATA-NEXT:    lui a1, %hi(eg2)
+; SMALL-DATA-NEXT:    sw a0, %lo(eg2)(a1)
+; SMALL-DATA-NEXT:    ret
+;
+; MINSIZE-LABEL: f1:
+; MINSIZE:       # %bb.0:
+; MINSIZE-NEXT:    lui a1, %hi(.L_MergedGlobals)
+; MINSIZE-NEXT:    sw a0, %lo(.L_MergedGlobals)(a1)
+; MINSIZE-NEXT:    addi a1, a1, %lo(.L_MergedGlobals)
+; MINSIZE-NEXT:    sw a0, 4(a1)
+; MINSIZE-NEXT:    sw a0, 8(a1)
+; MINSIZE-NEXT:    sw a0, 12(a1)
+; MINSIZE-NEXT:    ret
+  store i32 %a, ptr @ig1, align 4
+  store i32 %a, ptr @ig2, align 4
+  store i32 %a, ptr @eg1, align 4
+  store i32 %a, ptr @eg2, align 4
+  ret void
+}
+
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 8, !"SmallDataLimit", i32 8}
diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll
new file mode 100644
index 0000000000000..8e4d72af00ebc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/global-merge-minsize-smalldata-zero.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=SMALL-DATA
+; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -global-merge-min-data-size=5 \
+; RUN:    -verify-machineinstrs < %s | FileCheck %s -check-prefix=MINSIZE
+
+ at ig1 = internal global i32 0, align 4
+ at ig2 = internal global i32 0, align 4
+
+ at eg1 = dso_local global i32 0, align 4
+ at eg2 = dso_local global i32 0, align 4
+
+
+; This test shows that GlobalDataMinSize is set to 0 when SmallDataLimit module
+; flag is set to zero, and that the global-merge-min-data-size option overrides
+; the small data limit.
+
+define void @f1(i32 %a) nounwind {
+; SMALL-DATA-LABEL: f1:
+; SMALL-DATA:       # %bb.0:
+; SMALL-DATA-NEXT:    lui a1, %hi(.L_MergedGlobals)
+; SMALL-DATA-NEXT:    sw a0, %lo(.L_MergedGlobals)(a1)
+; SMALL-DATA-NEXT:    addi a1, a1, %lo(.L_MergedGlobals)
+; SMALL-DATA-NEXT:    sw a0, 4(a1)
+; SMALL-DATA-NEXT:    sw a0, 8(a1)
+; SMALL-DATA-NEXT:    sw a0, 12(a1)
+; SMALL-DATA-NEXT:    ret
+;
+; MINSIZE-LABEL: f1:
+; MINSIZE:       # %bb.0:
+; MINSIZE-NEXT:    lui a1, %hi(ig1)
+; MINSIZE-NEXT:    sw a0, %lo(ig1)(a1)
+; MINSIZE-NEXT:    lui a1, %hi(ig2)
+; MINSIZE-NEXT:    sw a0, %lo(ig2)(a1)
+; MINSIZE-NEXT:    lui a1, %hi(eg1)
+; MINSIZE-NEXT:    sw a0, %lo(eg1)(a1)
+; MINSIZE-NEXT:    lui a1, %hi(eg2)
+; MINSIZE-NEXT:    sw a0, %lo(eg2)(a1)
+; MINSIZE-NEXT:    ret
+  store i32 %a, ptr @ig1, align 4
+  store i32 %a, ptr @ig2, align 4
+  store i32 %a, ptr @eg1, align 4
+  store i32 %a, ptr @eg2, align 4
+  ret void
+}
+
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 8, !"SmallDataLimit", i32 0}
diff --git a/llvm/test/CodeGen/RISCV/global-merge-minsize.ll b/llvm/test/CodeGen/RISCV/global-merge-minsize.ll
new file mode 100644
index 0000000000000..e405425832acb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/global-merge-minsize.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32
+; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -global-merge-min-data-size=5 \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-MINSIZE
+
+ at ig1 = internal global i32 0, align 4
+ at ig2 = internal global i32 0, align 4
+
+ at eg1 = dso_local global i32 0, align 4
+ at eg2 = dso_local global i32 0, align 4
+
+define void @f1(i32 %a) nounwind {
+; RV32-LABEL: f1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.L_MergedGlobals)
+; RV32-NEXT:    sw a0, %lo(.L_MergedGlobals)(a1)
+; RV32-NEXT:    addi a1, a1, %lo(.L_MergedGlobals)
+; RV32-NEXT:    sw a0, 4(a1)
+; RV32-NEXT:    sw a0, 8(a1)
+; RV32-NEXT:    sw a0, 12(a1)
+; RV32-NEXT:    ret
+;
+; RV32-MINSIZE-LABEL: f1:
+; RV32-MINSIZE:       # %bb.0:
+; RV32-MINSIZE-NEXT:    lui a1, %hi(ig1)
+; RV32-MINSIZE-NEXT:    sw a0, %lo(ig1)(a1)
+; RV32-MINSIZE-NEXT:    lui a1, %hi(ig2)
+; RV32-MINSIZE-NEXT:    sw a0, %lo(ig2)(a1)
+; RV32-MINSIZE-NEXT:    lui a1, %hi(eg1)
+; RV32-MINSIZE-NEXT:    sw a0, %lo(eg1)(a1)
+; RV32-MINSIZE-NEXT:    lui a1, %hi(eg2)
+; RV32-MINSIZE-NEXT:    sw a0, %lo(eg2)(a1)
+; RV32-MINSIZE-NEXT:    ret
+  store i32 %a, ptr @ig1, align 4
+  store i32 %a, ptr @ig2, align 4
+  store i32 %a, ptr @eg1, align 4
+  store i32 %a, ptr @eg2, align 4
+  ret void
+}



More information about the llvm-commits mailing list