[clang] [APINotes] Reduce memory footprint for Obj-C/C++ contexts (PR #99358)

Egor Zhdan via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 17 10:16:18 PDT 2024


https://github.com/egorzhdan created https://github.com/llvm/llvm-project/pull/99358

We were storing extraneous data for certain Objective-C/C++ entities. Specifically, for declarations that can be nested in another context (such as functions) we were storing the kind of the parent context in addition to its ID. The ID is always sufficient.

This removes the logically incorrect usages of `ContextTableKey` that don't actually describe a context, but rather describe a single declaration. This introduces `SingleDeclTableKey` to store that kind of entities in a more compact and reasonable way.

>From cec486ee4a00677c143ee70f4f4b5ef6e6c4f3ca Mon Sep 17 00:00:00 2001
From: Egor Zhdan <e_zhdan at apple.com>
Date: Wed, 17 Jul 2024 18:13:34 +0100
Subject: [PATCH] [APINotes] Reduce memory footprint for Obj-C/C++ contexts

We were storing extraneous data for certain Objective-C/C++ entities. Specifically, for declarations that can be nested in another context (such as functions) we were storing the kind of the parent context in addition to its ID. The ID is always sufficient.

This removes the logically incorrect usages of `ContextTableKey` that don't actually describe a context, but rather describe a single declaration. This introduces `SingleDeclTableKey` to store that kind of entities in a more compact and reasonable way.
---
 clang/lib/APINotes/APINotesFormat.h   | 56 +++++++++++++++++++++++----
 clang/lib/APINotes/APINotesReader.cpp | 32 ++++++---------
 clang/lib/APINotes/APINotesWriter.cpp | 46 +++++++++++-----------
 3 files changed, 83 insertions(+), 51 deletions(-)

diff --git a/clang/lib/APINotes/APINotesFormat.h b/clang/lib/APINotes/APINotesFormat.h
index e3aa76df8316c..da52a5670c0b3 100644
--- a/clang/lib/APINotes/APINotesFormat.h
+++ b/clang/lib/APINotes/APINotesFormat.h
@@ -24,7 +24,7 @@ const uint16_t VERSION_MAJOR = 0;
 /// API notes file minor version number.
 ///
 /// When the format changes IN ANY WAY, this number should be incremented.
-const uint16_t VERSION_MINOR = 26; // SwiftCopyable
+const uint16_t VERSION_MINOR = 27; // SingleDeclTableKey
 
 const uint8_t kSwiftCopyable = 1;
 const uint8_t kSwiftNonCopyable = 2;
@@ -269,12 +269,6 @@ struct ContextTableKey {
       : parentContextID(parentContextID), contextKind(contextKind),
         contextID(contextID) {}
 
-  ContextTableKey(std::optional<Context> context, IdentifierID nameID)
-      : parentContextID(context ? context->id.Value : (uint32_t)-1),
-        contextKind(context ? static_cast<uint8_t>(context->kind)
-                            : static_cast<uint8_t>(-1)),
-        contextID(nameID) {}
-
   llvm::hash_code hashValue() const {
     return llvm::hash_value(
         std::tuple{parentContextID, contextKind, contextID});
@@ -286,6 +280,31 @@ inline bool operator==(const ContextTableKey &lhs, const ContextTableKey &rhs) {
          lhs.contextKind == rhs.contextKind && lhs.contextID == rhs.contextID;
 }
 
+/// A stored Objective-C or C++ declaration, represented by the ID of its parent
+/// context, and the name of the declaration.
+struct SingleDeclTableKey {
+  uint32_t parentContextID;
+  uint32_t nameID;
+
+  SingleDeclTableKey() : parentContextID(-1), nameID(-1) {}
+
+  SingleDeclTableKey(uint32_t ParentContextID, uint32_t NameID)
+      : parentContextID(ParentContextID), nameID(NameID) {}
+
+  SingleDeclTableKey(std::optional<Context> ParentCtx, IdentifierID NameID)
+      : parentContextID(ParentCtx ? ParentCtx->id.Value : (uint32_t)-1),
+        nameID(NameID) {}
+
+  llvm::hash_code hashValue() const {
+    return llvm::hash_value(std::make_pair(parentContextID, nameID));
+  }
+};
+
+inline bool operator==(const SingleDeclTableKey &lhs,
+                       const SingleDeclTableKey &rhs) {
+  return lhs.parentContextID == rhs.parentContextID && lhs.nameID == rhs.nameID;
+}
+
 } // namespace api_notes
 } // namespace clang
 
@@ -341,6 +360,29 @@ template <> struct DenseMapInfo<clang::api_notes::ContextTableKey> {
     return lhs == rhs;
   }
 };
+
+template <> struct DenseMapInfo<clang::api_notes::SingleDeclTableKey> {
+  static inline clang::api_notes::SingleDeclTableKey getEmptyKey() {
+    return clang::api_notes::SingleDeclTableKey();
+  }
+
+  static inline clang::api_notes::SingleDeclTableKey getTombstoneKey() {
+    return clang::api_notes::SingleDeclTableKey{
+        DenseMapInfo<uint32_t>::getTombstoneKey(),
+        DenseMapInfo<uint32_t>::getTombstoneKey()};
+  }
+
+  static unsigned
+  getHashValue(const clang::api_notes::SingleDeclTableKey &value) {
+    return value.hashValue();
+  }
+
+  static bool isEqual(const clang::api_notes::SingleDeclTableKey &lhs,
+                      const clang::api_notes::SingleDeclTableKey &rhs) {
+    return lhs == rhs;
+  }
+};
+
 } // namespace llvm
 
 #endif
diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp
index 8454e092b55ac..7600738374840 100644
--- a/clang/lib/APINotes/APINotesReader.cpp
+++ b/clang/lib/APINotes/APINotesReader.cpp
@@ -429,15 +429,13 @@ class ObjCSelectorTableInfo {
 
 /// Used to deserialize the on-disk global variable table.
 class GlobalVariableTableInfo
-    : public VersionedTableInfo<GlobalVariableTableInfo, ContextTableKey,
+    : public VersionedTableInfo<GlobalVariableTableInfo, SingleDeclTableKey,
                                 GlobalVariableInfo> {
 public:
   static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) {
     auto CtxID = endian::readNext<uint32_t, llvm::endianness::little>(Data);
-    auto ContextKind =
-        endian::readNext<uint8_t, llvm::endianness::little>(Data);
     auto NameID = endian::readNext<uint32_t, llvm::endianness::little>(Data);
-    return {CtxID, ContextKind, NameID};
+    return {CtxID, NameID};
   }
 
   hash_value_type ComputeHash(internal_key_type Key) {
@@ -454,15 +452,13 @@ class GlobalVariableTableInfo
 
 /// Used to deserialize the on-disk global function table.
 class GlobalFunctionTableInfo
-    : public VersionedTableInfo<GlobalFunctionTableInfo, ContextTableKey,
+    : public VersionedTableInfo<GlobalFunctionTableInfo, SingleDeclTableKey,
                                 GlobalFunctionInfo> {
 public:
   static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) {
     auto CtxID = endian::readNext<uint32_t, llvm::endianness::little>(Data);
-    auto ContextKind =
-        endian::readNext<uint8_t, llvm::endianness::little>(Data);
     auto NameID = endian::readNext<uint32_t, llvm::endianness::little>(Data);
-    return {CtxID, ContextKind, NameID};
+    return {CtxID, NameID};
   }
 
   hash_value_type ComputeHash(internal_key_type Key) {
@@ -501,15 +497,13 @@ class EnumConstantTableInfo
 
 /// Used to deserialize the on-disk tag table.
 class TagTableInfo
-    : public VersionedTableInfo<TagTableInfo, ContextTableKey, TagInfo> {
+    : public VersionedTableInfo<TagTableInfo, SingleDeclTableKey, TagInfo> {
 public:
   static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) {
     auto CtxID = endian::readNext<uint32_t, llvm::endianness::little>(Data);
-    auto ContextKind =
-        endian::readNext<uint8_t, llvm::endianness::little>(Data);
     auto NameID =
         endian::readNext<IdentifierID, llvm::endianness::little>(Data);
-    return {CtxID, ContextKind, NameID};
+    return {CtxID, NameID};
   }
 
   hash_value_type ComputeHash(internal_key_type Key) {
@@ -563,16 +557,14 @@ class TagTableInfo
 
 /// Used to deserialize the on-disk typedef table.
 class TypedefTableInfo
-    : public VersionedTableInfo<TypedefTableInfo, ContextTableKey,
+    : public VersionedTableInfo<TypedefTableInfo, SingleDeclTableKey,
                                 TypedefInfo> {
 public:
   static internal_key_type ReadKey(const uint8_t *Data, unsigned Length) {
     auto CtxID = endian::readNext<uint32_t, llvm::endianness::little>(Data);
-    auto ContextKind =
-        endian::readNext<uint8_t, llvm::endianness::little>(Data);
     auto nameID =
         endian::readNext<IdentifierID, llvm::endianness::little>(Data);
-    return {CtxID, ContextKind, nameID};
+    return {CtxID, nameID};
   }
 
   hash_value_type ComputeHash(internal_key_type Key) {
@@ -1929,7 +1921,7 @@ auto APINotesReader::lookupGlobalVariable(llvm::StringRef Name,
   if (!NameID)
     return std::nullopt;
 
-  ContextTableKey Key(Ctx, *NameID);
+  SingleDeclTableKey Key(Ctx, *NameID);
 
   auto Known = Implementation->GlobalVariableTable->find(Key);
   if (Known == Implementation->GlobalVariableTable->end())
@@ -1948,7 +1940,7 @@ auto APINotesReader::lookupGlobalFunction(llvm::StringRef Name,
   if (!NameID)
     return std::nullopt;
 
-  ContextTableKey Key(Ctx, *NameID);
+  SingleDeclTableKey Key(Ctx, *NameID);
 
   auto Known = Implementation->GlobalFunctionTable->find(Key);
   if (Known == Implementation->GlobalFunctionTable->end())
@@ -1982,7 +1974,7 @@ auto APINotesReader::lookupTag(llvm::StringRef Name, std::optional<Context> Ctx)
   if (!NameID)
     return std::nullopt;
 
-  ContextTableKey Key(Ctx, *NameID);
+  SingleDeclTableKey Key(Ctx, *NameID);
 
   auto Known = Implementation->TagTable->find(Key);
   if (Known == Implementation->TagTable->end())
@@ -2001,7 +1993,7 @@ auto APINotesReader::lookupTypedef(llvm::StringRef Name,
   if (!NameID)
     return std::nullopt;
 
-  ContextTableKey Key(Ctx, *NameID);
+  SingleDeclTableKey Key(Ctx, *NameID);
 
   auto Known = Implementation->TypedefTable->find(Key);
   if (Known == Implementation->TypedefTable->end())
diff --git a/clang/lib/APINotes/APINotesWriter.cpp b/clang/lib/APINotes/APINotesWriter.cpp
index 4053d515ef426..1090d3f20df21 100644
--- a/clang/lib/APINotes/APINotesWriter.cpp
+++ b/clang/lib/APINotes/APINotesWriter.cpp
@@ -75,17 +75,17 @@ class APINotesWriter::Implementation {
 
   /// Information about global variables.
   ///
-  /// Indexed by the context ID, contextKind, identifier ID.
+  /// Indexed by the context ID, identifier ID.
   llvm::DenseMap<
-      ContextTableKey,
+      SingleDeclTableKey,
       llvm::SmallVector<std::pair<VersionTuple, GlobalVariableInfo>, 1>>
       GlobalVariables;
 
   /// Information about global functions.
   ///
-  /// Indexed by the context ID, contextKind, identifier ID.
+  /// Indexed by the context ID, identifier ID.
   llvm::DenseMap<
-      ContextTableKey,
+      SingleDeclTableKey,
       llvm::SmallVector<std::pair<VersionTuple, GlobalFunctionInfo>, 1>>
       GlobalFunctions;
 
@@ -98,15 +98,15 @@ class APINotesWriter::Implementation {
 
   /// Information about tags.
   ///
-  /// Indexed by the context ID, contextKind, identifier ID.
-  llvm::DenseMap<ContextTableKey,
+  /// Indexed by the context ID, identifier ID.
+  llvm::DenseMap<SingleDeclTableKey,
                  llvm::SmallVector<std::pair<VersionTuple, TagInfo>, 1>>
       Tags;
 
   /// Information about typedefs.
   ///
-  /// Indexed by the context ID, contextKind, identifier ID.
-  llvm::DenseMap<ContextTableKey,
+  /// Indexed by the context ID, identifier ID.
+  llvm::DenseMap<SingleDeclTableKey,
                  llvm::SmallVector<std::pair<VersionTuple, TypedefInfo>, 1>>
       Typedefs;
 
@@ -865,18 +865,17 @@ void APINotesWriter::Implementation::writeObjCSelectorBlock(
 namespace {
 /// Used to serialize the on-disk global variable table.
 class GlobalVariableTableInfo
-    : public VersionedTableInfo<GlobalVariableTableInfo, ContextTableKey,
+    : public VersionedTableInfo<GlobalVariableTableInfo, SingleDeclTableKey,
                                 GlobalVariableInfo> {
 public:
   unsigned getKeyLength(key_type_ref) {
-    return sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint32_t);
+    return sizeof(uint32_t) + sizeof(uint32_t);
   }
 
   void EmitKey(raw_ostream &OS, key_type_ref Key, unsigned) {
     llvm::support::endian::Writer writer(OS, llvm::endianness::little);
     writer.write<uint32_t>(Key.parentContextID);
-    writer.write<uint8_t>(Key.contextKind);
-    writer.write<uint32_t>(Key.contextID);
+    writer.write<uint32_t>(Key.nameID);
   }
 
   hash_value_type ComputeHash(key_type_ref Key) {
@@ -979,18 +978,17 @@ void emitFunctionInfo(raw_ostream &OS, const FunctionInfo &FI) {
 
 /// Used to serialize the on-disk global function table.
 class GlobalFunctionTableInfo
-    : public VersionedTableInfo<GlobalFunctionTableInfo, ContextTableKey,
+    : public VersionedTableInfo<GlobalFunctionTableInfo, SingleDeclTableKey,
                                 GlobalFunctionInfo> {
 public:
   unsigned getKeyLength(key_type_ref) {
-    return sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint32_t);
+    return sizeof(uint32_t) + sizeof(uint32_t);
   }
 
   void EmitKey(raw_ostream &OS, key_type_ref Key, unsigned) {
     llvm::support::endian::Writer writer(OS, llvm::endianness::little);
     writer.write<uint32_t>(Key.parentContextID);
-    writer.write<uint8_t>(Key.contextKind);
-    writer.write<uint32_t>(Key.contextID);
+    writer.write<uint32_t>(Key.nameID);
   }
 
   hash_value_type ComputeHash(key_type_ref Key) {
@@ -1091,20 +1089,20 @@ void APINotesWriter::Implementation::writeEnumConstantBlock(
 namespace {
 template <typename Derived, typename UnversionedDataType>
 class CommonTypeTableInfo
-    : public VersionedTableInfo<Derived, ContextTableKey, UnversionedDataType> {
+    : public VersionedTableInfo<Derived, SingleDeclTableKey,
+                                UnversionedDataType> {
 public:
   using key_type_ref = typename CommonTypeTableInfo::key_type_ref;
   using hash_value_type = typename CommonTypeTableInfo::hash_value_type;
 
   unsigned getKeyLength(key_type_ref) {
-    return sizeof(uint32_t) + sizeof(uint8_t) + sizeof(IdentifierID);
+    return sizeof(uint32_t) + sizeof(IdentifierID);
   }
 
   void EmitKey(raw_ostream &OS, key_type_ref Key, unsigned) {
     llvm::support::endian::Writer writer(OS, llvm::endianness::little);
     writer.write<uint32_t>(Key.parentContextID);
-    writer.write<uint8_t>(Key.contextKind);
-    writer.write<IdentifierID>(Key.contextID);
+    writer.write<IdentifierID>(Key.nameID);
   }
 
   hash_value_type ComputeHash(key_type_ref Key) {
@@ -1351,7 +1349,7 @@ void APINotesWriter::addGlobalVariable(std::optional<Context> Ctx,
                                        const GlobalVariableInfo &Info,
                                        VersionTuple SwiftVersion) {
   IdentifierID VariableID = Implementation->getIdentifier(Name);
-  ContextTableKey Key(Ctx, VariableID);
+  SingleDeclTableKey Key(Ctx, VariableID);
   Implementation->GlobalVariables[Key].push_back({SwiftVersion, Info});
 }
 
@@ -1360,7 +1358,7 @@ void APINotesWriter::addGlobalFunction(std::optional<Context> Ctx,
                                        const GlobalFunctionInfo &Info,
                                        VersionTuple SwiftVersion) {
   IdentifierID NameID = Implementation->getIdentifier(Name);
-  ContextTableKey Key(Ctx, NameID);
+  SingleDeclTableKey Key(Ctx, NameID);
   Implementation->GlobalFunctions[Key].push_back({SwiftVersion, Info});
 }
 
@@ -1374,7 +1372,7 @@ void APINotesWriter::addEnumConstant(llvm::StringRef Name,
 void APINotesWriter::addTag(std::optional<Context> Ctx, llvm::StringRef Name,
                             const TagInfo &Info, VersionTuple SwiftVersion) {
   IdentifierID TagID = Implementation->getIdentifier(Name);
-  ContextTableKey Key(Ctx, TagID);
+  SingleDeclTableKey Key(Ctx, TagID);
   Implementation->Tags[Key].push_back({SwiftVersion, Info});
 }
 
@@ -1382,7 +1380,7 @@ void APINotesWriter::addTypedef(std::optional<Context> Ctx,
                                 llvm::StringRef Name, const TypedefInfo &Info,
                                 VersionTuple SwiftVersion) {
   IdentifierID TypedefID = Implementation->getIdentifier(Name);
-  ContextTableKey Key(Ctx, TypedefID);
+  SingleDeclTableKey Key(Ctx, TypedefID);
   Implementation->Typedefs[Key].push_back({SwiftVersion, Info});
 }
 } // namespace api_notes



More information about the cfe-commits mailing list