[llvm] a244183 - [CodeLayout] cache-directed sort: limit max chain size (#69039)

via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 22 16:50:07 PDT 2023


Author: Fangrui Song
Date: 2023-10-22T16:50:03-07:00
New Revision: a24418375a707b7314e7687888dd02eaefca42cd

URL: https://github.com/llvm/llvm-project/commit/a24418375a707b7314e7687888dd02eaefca42cd
DIFF: https://github.com/llvm/llvm-project/commit/a24418375a707b7314e7687888dd02eaefca42cd.diff

LOG: [CodeLayout] cache-directed sort: limit max chain size (#69039)

When linking an executable with a slightly larger executable,
ld.lld --call-graph-profile-sort=cdsort can be very slow (see #68638).
```
   4.6%  20.7Mi    .text.hot
   3.5%  15.9Mi    .text
   3.4%  15.2Mi    .text.unknown
```

Add cl option `cdsort-max-chain-size`, which is similar to
`ext-tsp-max-chain-size`, and set it to 128, to improve performance.

In `ld.lld @response.txt --threads=4 --call-graph-profile-sort=cdsort
--time-trace"
builds, the "Total Sort sections" time is measured as follows:

* -mllvm  -cdsort-max-chain-size=64: 1.321813
* -mllvm -cdsort-max-chain-size=128: 2.030425
* -mllvm -cdsort-max-chain-size=256: 2.927684
* -mllvm -cdsort-max-chain-size=512: 5.493106
* unlimited: 9 minutes

The rest part takes 6.8s.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/CodeLayout.h
    llvm/lib/Transforms/Utils/CodeLayout.cpp
    llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h
index f5127cff24af0df..9d550fae6dd0674 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h
@@ -65,6 +65,8 @@ struct CDSortConfig {
   unsigned CacheEntries = 16;
   /// The size of a line in the cache.
   unsigned CacheSize = 2048;
+  /// The maximum size of a chain to create.
+  unsigned MaxChainSize = 128;
   /// The power exponent for the distance-based locality.
   double DistancePower = 0.25;
   /// The scale factor for the frequency-based locality.

diff  --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index 6252c429205ab62..a6c9d2ac6cf2ffb 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -123,6 +123,10 @@ static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden,
 static cl::opt<unsigned> CacheSize("cds-cache-size", cl::ReallyHidden,
                                    cl::desc("The size of a line in the cache"));
 
+static cl::opt<unsigned>
+    CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
+                   cl::desc("The maximum size of a chain to create"));
+
 static cl::opt<double> DistancePower(
     "cds-distance-power", cl::ReallyHidden,
     cl::desc("The power exponent for the distance-based locality"));
@@ -1156,6 +1160,9 @@ class CDSortImpl {
         // Ignore loop edges.
         if (Edge->isSelfEdge())
           continue;
+        if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
+            Config.MaxChainSize)
+          continue;
 
         // Compute the gain of merging the two chains.
         MergeGainT Gain = getBestMergeGain(Edge);
@@ -1452,6 +1459,8 @@ std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
     Config.CacheEntries = CacheEntries;
   if (CacheSize.getNumOccurrences() > 0)
     Config.CacheSize = CacheSize;
+  if (CDMaxChainSize.getNumOccurrences() > 0)
+    Config.MaxChainSize = CDMaxChainSize;
   if (DistancePower.getNumOccurrences() > 0)
     Config.DistancePower = DistancePower;
   if (FrequencyScale.getNumOccurrences() > 0)

diff  --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
index ce42f703229bd01..ef9aa9a76342f4d 100644
--- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
@@ -40,6 +40,14 @@ TEST(CodeLayout, HotChain) {
     const std::vector<uint64_t> CallOffsets(std::size(Edges), 5);
     auto Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
     EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 2, 1}));
+
+    // -cdsort-max-chain-size disables forming a larger chain and therefore may
+    // change the result.
+    CDSortConfig Config;
+    Config.MaxChainSize = 3;
+    Order =
+        computeCacheDirectedLayout(Config, Sizes, Counts, Edges, CallOffsets);
+    EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2}));
   }
 }
 


        


More information about the llvm-commits mailing list