[llvm] [CodeLayout] cache-directed sort: limit max chain size (PR #69039)

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 19 13:42:48 PDT 2023


https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/69039

>From ebde38a2356b405951ef1f0c4684fae221803887 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Fri, 13 Oct 2023 18:15:24 -0700
Subject: [PATCH 1/2] [CodeLayout] cache-directed sort: limit max chain size

When linking an executable with a slightly larger executable,
ld.lld --call-graph-profile-sort=cdsort can be very slow (see #68638).
```
   4.6%  20.7Mi    .text.hot
   3.5%  15.9Mi    .text
   3.4%  15.2Mi    .text.unknown
```

Add cl option `cds-max-chain-size`, which is similar to
`ext-tsp-max-chain-size`, and set it to 128, to improve performance.

In `ld.lld @response.txt --threads=4 --call-graph-profile-sort=cdsort --time-trace"
builds, the "Total Sort sections" time is measured as follows:

* -mllvm  -cds-max-chain-size=64: 1.321813
* -mllvm -cds-max-chain-size=128: 2.030425
* -mllvm -cds-max-chain-size=256: 2.927684
* -mllvm -cds-max-chain-size=512: 5.493106
* unlimited: 9 minutes

The rest part takes 6.8s.
---
 llvm/lib/Transforms/Utils/CodeLayout.cpp           |  9 +++++++++
 llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index 6252c429205ab62..f7f080af183bac3 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -62,6 +62,12 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
     "ext-tsp-apply-without-profile",
     cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
     cl::init(true), cl::Hidden);
+
+namespace codelayout {
+cl::opt<unsigned>
+    CDMaxChainSize("cdsort-max-chain-size", cl::Hidden, cl::init(128),
+                   cl::desc("The maximum size of a chain to create"));
+}
 } // namespace llvm
 
 // Algorithm-specific params for Ext-TSP. The values are tuned for the best
@@ -1156,6 +1162,9 @@ class CDSortImpl {
         // Ignore loop edges.
         if (Edge->isSelfEdge())
           continue;
+        if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
+            CDMaxChainSize)
+          continue;
 
         // Compute the gain of merging the two chains.
         MergeGainT Gain = getBestMergeGain(Edge);
diff --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
index ce42f703229bd01..b6dcd03565bccbe 100644
--- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
@@ -1,4 +1,5 @@
 #include "llvm/Transforms/Utils/CodeLayout.h"
+#include "llvm/Support/CommandLine.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <vector>
@@ -7,6 +8,10 @@ using namespace llvm;
 using namespace llvm::codelayout;
 using testing::ElementsAreArray;
 
+namespace llvm::codelayout {
+extern cl::opt<unsigned> CDMaxChainSize;
+}
+
 namespace {
 TEST(CodeLayout, ThreeFunctions) {
   // Place the most likely successor (2) first.
@@ -40,6 +45,14 @@ TEST(CodeLayout, HotChain) {
     const std::vector<uint64_t> CallOffsets(std::size(Edges), 5);
     auto Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
     EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 2, 1}));
+
+    // -cdsort-max-chain-size disables forming a larger chain and therefore may
+    // change the result.
+    unsigned Saved = CDMaxChainSize;
+    CDMaxChainSize.setValue(3);
+    Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
+    EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2}));
+    CDMaxChainSize.setValue(Saved);
   }
 }
 

>From 8a47e7cd3a79a6c56b8fb64701287ce8405e8b9f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 19 Oct 2023 13:20:01 -0700
Subject: [PATCH 2/2] Add MaxChainSize to CDSortConfig

---
 llvm/include/llvm/Transforms/Utils/CodeLayout.h    |  2 ++
 llvm/lib/Transforms/Utils/CodeLayout.cpp           | 14 +++++++-------
 llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp | 13 ++++---------
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h
index f5127cff24af0df..9d550fae6dd0674 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h
@@ -65,6 +65,8 @@ struct CDSortConfig {
   unsigned CacheEntries = 16;
   /// The size of a line in the cache.
   unsigned CacheSize = 2048;
+  /// The maximum size of a chain to create.
+  unsigned MaxChainSize = 128;
   /// The power exponent for the distance-based locality.
   double DistancePower = 0.25;
   /// The scale factor for the frequency-based locality.
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index f7f080af183bac3..a6c9d2ac6cf2ffb 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -62,12 +62,6 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
     "ext-tsp-apply-without-profile",
     cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
     cl::init(true), cl::Hidden);
-
-namespace codelayout {
-cl::opt<unsigned>
-    CDMaxChainSize("cdsort-max-chain-size", cl::Hidden, cl::init(128),
-                   cl::desc("The maximum size of a chain to create"));
-}
 } // namespace llvm
 
 // Algorithm-specific params for Ext-TSP. The values are tuned for the best
@@ -129,6 +123,10 @@ static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden,
 static cl::opt<unsigned> CacheSize("cds-cache-size", cl::ReallyHidden,
                                    cl::desc("The size of a line in the cache"));
 
+static cl::opt<unsigned>
+    CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
+                   cl::desc("The maximum size of a chain to create"));
+
 static cl::opt<double> DistancePower(
     "cds-distance-power", cl::ReallyHidden,
     cl::desc("The power exponent for the distance-based locality"));
@@ -1163,7 +1161,7 @@ class CDSortImpl {
         if (Edge->isSelfEdge())
           continue;
         if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
-            CDMaxChainSize)
+            Config.MaxChainSize)
           continue;
 
         // Compute the gain of merging the two chains.
@@ -1461,6 +1459,8 @@ std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
     Config.CacheEntries = CacheEntries;
   if (CacheSize.getNumOccurrences() > 0)
     Config.CacheSize = CacheSize;
+  if (CDMaxChainSize.getNumOccurrences() > 0)
+    Config.MaxChainSize = CDMaxChainSize;
   if (DistancePower.getNumOccurrences() > 0)
     Config.DistancePower = DistancePower;
   if (FrequencyScale.getNumOccurrences() > 0)
diff --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
index b6dcd03565bccbe..ef9aa9a76342f4d 100644
--- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
@@ -1,5 +1,4 @@
 #include "llvm/Transforms/Utils/CodeLayout.h"
-#include "llvm/Support/CommandLine.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <vector>
@@ -8,10 +7,6 @@ using namespace llvm;
 using namespace llvm::codelayout;
 using testing::ElementsAreArray;
 
-namespace llvm::codelayout {
-extern cl::opt<unsigned> CDMaxChainSize;
-}
-
 namespace {
 TEST(CodeLayout, ThreeFunctions) {
   // Place the most likely successor (2) first.
@@ -48,11 +43,11 @@ TEST(CodeLayout, HotChain) {
 
     // -cdsort-max-chain-size disables forming a larger chain and therefore may
     // change the result.
-    unsigned Saved = CDMaxChainSize;
-    CDMaxChainSize.setValue(3);
-    Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
+    CDSortConfig Config;
+    Config.MaxChainSize = 3;
+    Order =
+        computeCacheDirectedLayout(Config, Sizes, Counts, Edges, CallOffsets);
     EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2}));
-    CDMaxChainSize.setValue(Saved);
   }
 }
 



More information about the llvm-commits mailing list