[llvm] [LTO] Introduce a new class ImportIDTable (PR #106503)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 00:40:09 PDT 2024
https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/106503
The new class implements a deduplication table to convert import list
elements:
{SourceModule, GUID, Definition/Declaration}
into 32-bit integers, and vice versa. This patch adds a unit test but
does not add a use yet.
To be precise, the deduplication table holds {SourceModule, GUID}
pairs. We use the bottom one bit of the 32-bit integers to indicate
whether we have a definition or declaration.
A subsequent patch will collapse the import list hierarchy --
FunctionsToImportTy holding many instances of FunctionsToImportTy --
down to DenseSet<uint32_t> with each element indexing into the
deduplication table above. This will address multiple sources of
space inefficiency.
>From ee3aed40b1345f50323dce3804bf2d861527e998 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Tue, 27 Aug 2024 08:39:25 -0700
Subject: [PATCH] [LTO] Introduce a new class ImportIDTable
The new class implements a deduplication table to convert import list
elements:
{SourceModule, GUID, Definition/Declaration}
into 32-bit integers, and vice versa. This patch adds a unit test but
does not add a use yet.
To be precise, the deduplication table holds {SourceModule, GUID}
pairs. We use the bottom one bit of the 32-bit integers to indicate
whether we have a definition or declaration.
A subsequent patch will collapse the import list hierarchy --
FunctionsToImportTy holding many instances of FunctionsToImportTy --
down to DenseSet<uint32_t> with each element indexing into the
deduplication table above. This will address multiple sources of
space inefficiency.
---
.../llvm/Transforms/IPO/FunctionImport.h | 71 +++++++++++++++++
llvm/unittests/Transforms/IPO/CMakeLists.txt | 1 +
.../Transforms/IPO/ImportIDTableTests.cpp | 79 +++++++++++++++++++
3 files changed, 151 insertions(+)
create mode 100644 llvm/unittests/Transforms/IPO/ImportIDTableTests.cpp
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index b7280c56be9cc8..99fe110191dec9 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ModuleSummaryIndex.h"
@@ -96,6 +97,76 @@ class FunctionImporter {
std::tuple<unsigned, const GlobalValueSummary *,
std::unique_ptr<ImportFailureInfo>>>;
+ // Issues import IDs. Each ID uniquely corresponds to a tuple of
+ // (FromModule, GUID, Definition/Declaration).
+ //
+ // The import IDs make the import list space efficient by referring to each
+ // import with a 32-bit integer ID while maintaining a central table that maps
+ // those integer IDs to tuples of (FromModule, GUID, Def/Decl).
+ //
+ // In one large application, a pair of (FromModule, GUID) is mentioned in
+ // import lists more than 50 times on average across all destination modules.
+ // Mentioning the 32-byte tuple:
+ //
+ // std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind>
+ //
+ // 50 times by value in various import lists would be costly. We can reduce
+ // the memory footprint of import lists by placing one copy in a central table
+ // and referring to it with 32-bit integer IDs.
+ //
+ // To save space within the central table, we only store pairs of
+ // (FromModule, GUID) in the central table. In the actual 32-bit integer ID,
+ // the top 31 bits index into the central table while the bottom 1 bit
+ // indicates whether an ID is for GlobalValueSummary::Declaration or
+ // GlobalValueSummary::Definition.
+ class ImportIDTable {
+ public:
+ using ImportIDTy = uint32_t;
+
+ // Create a pair of import IDs [Def, Decl] for a given pair of FromModule
+ // and GUID.
+ std::pair<ImportIDTy, ImportIDTy> createImportIDs(StringRef FromModule,
+ GlobalValue::GUID GUID) {
+ auto Key = std::make_pair(FromModule, GUID);
+ auto InsertResult = TheTable.try_emplace(Key, TheTable.size());
+ return makeIDPair(InsertResult.first->second);
+ }
+
+ // Get a pair of previously created import IDs [Def, Decl] for a given pair
+ // of FromModule and GUID. Returns std::nullopt if not available.
+ std::optional<std::pair<ImportIDTy, ImportIDTy>>
+ getImportIDs(StringRef FromModule, GlobalValue::GUID GUID) {
+ auto Key = std::make_pair(FromModule, GUID);
+ auto It = TheTable.find(Key);
+ if (It != TheTable.end())
+ return makeIDPair(It->second);
+ return std::nullopt;
+ }
+
+ // Return a tuple of [FromModule, GUID, Def/Decl] that a given ImportID
+ // corresponds to.
+ std::tuple<StringRef, GlobalValue::GUID, GlobalValueSummary::ImportKind>
+ lookup(ImportIDTy ImportID) const {
+ GlobalValueSummary::ImportKind Kind =
+ (ImportID & 1) ? GlobalValueSummary::Declaration
+ : GlobalValueSummary::Definition;
+ auto It = TheTable.begin() + (ImportID >> 1);
+ StringRef FromModule = It->first.first;
+ GlobalValue::GUID GUID = It->first.second;
+ return std::make_tuple(FromModule, GUID, Kind);
+ }
+
+ private:
+ // Make a pair of import IDs [Def, Decl] from an index into TheTable.
+ static std::pair<ImportIDTy, ImportIDTy> makeIDPair(ImportIDTy Index) {
+ ImportIDTy Def = Index << 1;
+ ImportIDTy Decl = Def | 1;
+ return std::make_pair(Def, Decl);
+ }
+
+ MapVector<std::pair<StringRef, GlobalValue::GUID>, ImportIDTy> TheTable;
+ };
+
/// The map maintains the list of imports. Conceptually, it is a collection
/// of tuples of the form:
///
diff --git a/llvm/unittests/Transforms/IPO/CMakeLists.txt b/llvm/unittests/Transforms/IPO/CMakeLists.txt
index ac632450d57305..65be2966bec049 100644
--- a/llvm/unittests/Transforms/IPO/CMakeLists.txt
+++ b/llvm/unittests/Transforms/IPO/CMakeLists.txt
@@ -13,4 +13,5 @@ add_llvm_unittest(IPOTests
WholeProgramDevirt.cpp
AttributorTest.cpp
FunctionSpecializationTest.cpp
+ ImportIDTableTests.cpp
)
diff --git a/llvm/unittests/Transforms/IPO/ImportIDTableTests.cpp b/llvm/unittests/Transforms/IPO/ImportIDTableTests.cpp
new file mode 100644
index 00000000000000..7554ee59c6869a
--- /dev/null
+++ b/llvm/unittests/Transforms/IPO/ImportIDTableTests.cpp
@@ -0,0 +1,79 @@
+//===- ImportIDTableTests.cpp - Unit tests for ImportIDTable --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <set>
+#include <type_traits>
+
+using namespace llvm;
+
+TEST(ImportIDTableTests, Basic) {
+ FunctionImporter::ImportIDTable Table;
+
+ auto [Def, Decl] = Table.createImportIDs("mod", 123U);
+ auto [Def2, Decl2] = Table.createImportIDs("stuff", 456U);
+
+ // Def and Decl must be of the same unsigned integer type.
+ static_assert(
+ std::is_unsigned_v<FunctionImporter::ImportIDTable::ImportIDTy>);
+ static_assert(std::is_same_v<FunctionImporter::ImportIDTable::ImportIDTy,
+ decltype(Def)>);
+ static_assert(std::is_same_v<FunctionImporter::ImportIDTable::ImportIDTy,
+ decltype(Decl)>);
+
+ // Check that all IDs are unique.
+ std::set<FunctionImporter::ImportIDTable::ImportIDTy> IDs = {Def, Decl, Def2,
+ Decl2};
+ EXPECT_THAT(IDs, ::testing::SizeIs(4));
+
+ // Verify what Def maps to.
+ auto DefTuple = Table.lookup(Def);
+ EXPECT_EQ(std::get<0>(DefTuple), StringRef("mod"));
+ EXPECT_EQ(std::get<1>(DefTuple), 123U);
+ EXPECT_EQ(std::get<2>(DefTuple), GlobalValueSummary::Definition);
+
+ // Verify what Def maps to.
+ auto DeclTuple = Table.lookup(Decl);
+ EXPECT_EQ(std::get<0>(DeclTuple), StringRef("mod"));
+ EXPECT_EQ(std::get<1>(DeclTuple), 123U);
+ EXPECT_EQ(std::get<2>(DeclTuple), GlobalValueSummary::Declaration);
+
+ // Verify what Def2 maps to.
+ auto Def2Tuple = Table.lookup(Def2);
+ EXPECT_EQ(std::get<0>(Def2Tuple), StringRef("stuff"));
+ EXPECT_EQ(std::get<1>(Def2Tuple), 456U);
+ EXPECT_EQ(std::get<2>(Def2Tuple), GlobalValueSummary::Definition);
+
+ // Verify what Def2 maps to.
+ auto Decl2Tuple = Table.lookup(Decl2);
+ EXPECT_EQ(std::get<0>(Decl2Tuple), StringRef("stuff"));
+ EXPECT_EQ(std::get<1>(Decl2Tuple), 456U);
+ EXPECT_EQ(std::get<2>(Decl2Tuple), GlobalValueSummary::Declaration);
+}
+
+TEST(ImportIDTableTests, Duplicates) {
+ FunctionImporter::ImportIDTable Table;
+
+ auto [Def1, Decl1] = Table.createImportIDs("mod", 123U);
+ auto [Def2, Decl2] = Table.createImportIDs("mod", 123U);
+
+ // Verify we get the same IDs back.
+ EXPECT_EQ(Def1, Def2);
+ EXPECT_EQ(Decl1, Decl2);
+}
+
+TEST(ImportIDTableTests, Missing) {
+ FunctionImporter::ImportIDTable Table;
+
+ auto Result = Table.getImportIDs("mod", 123U);
+
+ // Verify that we get std::nullopt for a non-existent pair.
+ EXPECT_EQ(Result, std::nullopt);
+}
More information about the llvm-commits
mailing list