[llvm] [CodeLayout] cache-directed sort: limit max chain size (PR #69039)
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 13:42:48 PDT 2023
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/69039
>From ebde38a2356b405951ef1f0c4684fae221803887 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Fri, 13 Oct 2023 18:15:24 -0700
Subject: [PATCH 1/2] [CodeLayout] cache-directed sort: limit max chain size
When linking an executable with a slightly larger executable,
ld.lld --call-graph-profile-sort=cdsort can be very slow (see #68638).
```
4.6% 20.7Mi .text.hot
3.5% 15.9Mi .text
3.4% 15.2Mi .text.unknown
```
Add cl option `cds-max-chain-size`, which is similar to
`ext-tsp-max-chain-size`, and set it to 128, to improve performance.
In `ld.lld @response.txt --threads=4 --call-graph-profile-sort=cdsort --time-trace"
builds, the "Total Sort sections" time is measured as follows:
* -mllvm -cds-max-chain-size=64: 1.321813
* -mllvm -cds-max-chain-size=128: 2.030425
* -mllvm -cds-max-chain-size=256: 2.927684
* -mllvm -cds-max-chain-size=512: 5.493106
* unlimited: 9 minutes
The rest part takes 6.8s.
---
llvm/lib/Transforms/Utils/CodeLayout.cpp | 9 +++++++++
llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp | 13 +++++++++++++
2 files changed, 22 insertions(+)
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index 6252c429205ab62..f7f080af183bac3 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -62,6 +62,12 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
"ext-tsp-apply-without-profile",
cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
cl::init(true), cl::Hidden);
+
+namespace codelayout {
+cl::opt<unsigned>
+ CDMaxChainSize("cdsort-max-chain-size", cl::Hidden, cl::init(128),
+ cl::desc("The maximum size of a chain to create"));
+}
} // namespace llvm
// Algorithm-specific params for Ext-TSP. The values are tuned for the best
@@ -1156,6 +1162,9 @@ class CDSortImpl {
// Ignore loop edges.
if (Edge->isSelfEdge())
continue;
+ if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
+ CDMaxChainSize)
+ continue;
// Compute the gain of merging the two chains.
MergeGainT Gain = getBestMergeGain(Edge);
diff --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
index ce42f703229bd01..b6dcd03565bccbe 100644
--- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
@@ -1,4 +1,5 @@
#include "llvm/Transforms/Utils/CodeLayout.h"
+#include "llvm/Support/CommandLine.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <vector>
@@ -7,6 +8,10 @@ using namespace llvm;
using namespace llvm::codelayout;
using testing::ElementsAreArray;
+namespace llvm::codelayout {
+extern cl::opt<unsigned> CDMaxChainSize;
+}
+
namespace {
TEST(CodeLayout, ThreeFunctions) {
// Place the most likely successor (2) first.
@@ -40,6 +45,14 @@ TEST(CodeLayout, HotChain) {
const std::vector<uint64_t> CallOffsets(std::size(Edges), 5);
auto Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 2, 1}));
+
+ // -cdsort-max-chain-size disables forming a larger chain and therefore may
+ // change the result.
+ unsigned Saved = CDMaxChainSize;
+ CDMaxChainSize.setValue(3);
+ Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
+ EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2}));
+ CDMaxChainSize.setValue(Saved);
}
}
>From 8a47e7cd3a79a6c56b8fb64701287ce8405e8b9f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 19 Oct 2023 13:20:01 -0700
Subject: [PATCH 2/2] Add MaxChainSize to CDSortConfig
---
llvm/include/llvm/Transforms/Utils/CodeLayout.h | 2 ++
llvm/lib/Transforms/Utils/CodeLayout.cpp | 14 +++++++-------
llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp | 13 ++++---------
3 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/CodeLayout.h b/llvm/include/llvm/Transforms/Utils/CodeLayout.h
index f5127cff24af0df..9d550fae6dd0674 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeLayout.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeLayout.h
@@ -65,6 +65,8 @@ struct CDSortConfig {
unsigned CacheEntries = 16;
/// The size of a line in the cache.
unsigned CacheSize = 2048;
+ /// The maximum size of a chain to create.
+ unsigned MaxChainSize = 128;
/// The power exponent for the distance-based locality.
double DistancePower = 0.25;
/// The scale factor for the frequency-based locality.
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index f7f080af183bac3..a6c9d2ac6cf2ffb 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -62,12 +62,6 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
"ext-tsp-apply-without-profile",
cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
cl::init(true), cl::Hidden);
-
-namespace codelayout {
-cl::opt<unsigned>
- CDMaxChainSize("cdsort-max-chain-size", cl::Hidden, cl::init(128),
- cl::desc("The maximum size of a chain to create"));
-}
} // namespace llvm
// Algorithm-specific params for Ext-TSP. The values are tuned for the best
@@ -129,6 +123,10 @@ static cl::opt<unsigned> CacheEntries("cds-cache-entries", cl::ReallyHidden,
static cl::opt<unsigned> CacheSize("cds-cache-size", cl::ReallyHidden,
cl::desc("The size of a line in the cache"));
+static cl::opt<unsigned>
+ CDMaxChainSize("cdsort-max-chain-size", cl::ReallyHidden,
+ cl::desc("The maximum size of a chain to create"));
+
static cl::opt<double> DistancePower(
"cds-distance-power", cl::ReallyHidden,
cl::desc("The power exponent for the distance-based locality"));
@@ -1163,7 +1161,7 @@ class CDSortImpl {
if (Edge->isSelfEdge())
continue;
if (Edge->srcChain()->numBlocks() + Edge->dstChain()->numBlocks() >
- CDMaxChainSize)
+ Config.MaxChainSize)
continue;
// Compute the gain of merging the two chains.
@@ -1461,6 +1459,8 @@ std::vector<uint64_t> codelayout::computeCacheDirectedLayout(
Config.CacheEntries = CacheEntries;
if (CacheSize.getNumOccurrences() > 0)
Config.CacheSize = CacheSize;
+ if (CDMaxChainSize.getNumOccurrences() > 0)
+ Config.MaxChainSize = CDMaxChainSize;
if (DistancePower.getNumOccurrences() > 0)
Config.DistancePower = DistancePower;
if (FrequencyScale.getNumOccurrences() > 0)
diff --git a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
index b6dcd03565bccbe..ef9aa9a76342f4d 100644
--- a/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeLayoutTest.cpp
@@ -1,5 +1,4 @@
#include "llvm/Transforms/Utils/CodeLayout.h"
-#include "llvm/Support/CommandLine.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <vector>
@@ -8,10 +7,6 @@ using namespace llvm;
using namespace llvm::codelayout;
using testing::ElementsAreArray;
-namespace llvm::codelayout {
-extern cl::opt<unsigned> CDMaxChainSize;
-}
-
namespace {
TEST(CodeLayout, ThreeFunctions) {
// Place the most likely successor (2) first.
@@ -48,11 +43,11 @@ TEST(CodeLayout, HotChain) {
// -cdsort-max-chain-size disables forming a larger chain and therefore may
// change the result.
- unsigned Saved = CDMaxChainSize;
- CDMaxChainSize.setValue(3);
- Order = computeCacheDirectedLayout(Sizes, Counts, Edges, CallOffsets);
+ CDSortConfig Config;
+ Config.MaxChainSize = 3;
+ Order =
+ computeCacheDirectedLayout(Config, Sizes, Counts, Edges, CallOffsets);
EXPECT_THAT(Order, ElementsAreArray({0, 3, 4, 1, 2}));
- CDMaxChainSize.setValue(Saved);
}
}
More information about the llvm-commits
mailing list