[clang] [clang][NFC] Refactor `Selector` to use `PointerIntPair` inside (PR #69916)

Vlad Serebrennikov via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 24 09:10:39 PDT 2023


https://github.com/Endilll updated https://github.com/llvm/llvm-project/pull/69916

>From 58ebdda4e44b3fa2547d85a6cc9d5b0aa913b22a Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Mon, 23 Oct 2023 13:55:46 +0300
Subject: [PATCH 1/2] [clang][NFC] Refactor `Selector` to use `PointerIntPair`
 inside

---
 clang/include/clang/AST/DeclarationName.h   |   3 +-
 clang/include/clang/Basic/IdentifierTable.h | 261 +++++++++++---------
 clang/lib/Basic/IdentifierTable.cpp         |  59 +----
 3 files changed, 145 insertions(+), 178 deletions(-)

diff --git a/clang/include/clang/AST/DeclarationName.h b/clang/include/clang/AST/DeclarationName.h
index b06931ea3e418f8..c9b01dc53964bd0 100644
--- a/clang/include/clang/AST/DeclarationName.h
+++ b/clang/include/clang/AST/DeclarationName.h
@@ -362,7 +362,8 @@ class DeclarationName {
   }
 
   /// Construct a declaration name from an Objective-C selector.
-  DeclarationName(Selector Sel) : Ptr(Sel.InfoPtr) {}
+  DeclarationName(Selector Sel)
+      : Ptr(reinterpret_cast<uintptr_t>(Sel.InfoPtr.getOpaqueValue())) {}
 
   /// Returns the name for all C++ using-directives.
   static DeclarationName getUsingDirectiveName() {
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 1a1ffddf2b1c601..4972e64fee41e23 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -19,6 +19,9 @@
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -794,6 +797,121 @@ enum ObjCStringFormatFamily {
   SFF_CFString
 };
 
+namespace detail {
+
+/// DeclarationNameExtra is used as a base of various uncommon special names.
+/// This class is needed since DeclarationName has not enough space to store
+/// the kind of every possible names. Therefore the kind of common names is
+/// stored directly in DeclarationName, and the kind of uncommon names is
+/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
+/// DeclarationName needs the lower 3 bits to store the kind of common names.
+/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
+/// here is very likely to require changes in DeclarationName(Table).
+class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
+  friend class clang::DeclarationName;
+  friend class clang::DeclarationNameTable;
+
+protected:
+  /// The kind of "extra" information stored in the DeclarationName. See
+  /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
+  /// are used. Note that DeclarationName depends on the numerical values
+  /// of the enumerators in this enum. See DeclarationName::StoredNameKind
+  /// for more info.
+  enum ExtraKind {
+    CXXDeductionGuideName,
+    CXXLiteralOperatorName,
+    CXXUsingDirective,
+    ObjCMultiArgSelector
+  };
+
+  /// ExtraKindOrNumArgs has one of the following meaning:
+  ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
+  ///    is in this case in fact either a CXXDeductionGuideNameExtra or
+  ///    a CXXLiteralOperatorIdName.
+  ///
+  ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
+  ///
+  ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
+  ///    the number of arguments in the Objective-C selector, in which
+  ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
+  unsigned ExtraKindOrNumArgs;
+
+  DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
+  DeclarationNameExtra(unsigned NumArgs)
+      : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
+
+  /// Return the corresponding ExtraKind.
+  ExtraKind getKind() const {
+    return static_cast<ExtraKind>(ExtraKindOrNumArgs >
+                                          (unsigned)ObjCMultiArgSelector
+                                      ? (unsigned)ObjCMultiArgSelector
+                                      : ExtraKindOrNumArgs);
+  }
+
+  /// Return the number of arguments in an ObjC selector. Only valid when this
+  /// is indeed an ObjCMultiArgSelector.
+  unsigned getNumArgs() const {
+    assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
+           "getNumArgs called but this is not an ObjC selector!");
+    return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
+  }
+};
+
+} // namespace detail
+
+/// One of these variable length records is kept for each
+/// selector containing more than one keyword. We use a folding set
+/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
+/// this class is provided strictly through Selector.
+class alignas(IdentifierInfoAlignment) MultiKeywordSelector
+    : public detail::DeclarationNameExtra,
+      public llvm::FoldingSetNode {
+  MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
+
+public:
+  // Constructor for keyword selectors.
+  MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
+      : DeclarationNameExtra(nKeys) {
+    assert((nKeys > 1) && "not a multi-keyword selector");
+
+    // Fill in the trailing keyword array.
+    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
+    for (unsigned i = 0; i != nKeys; ++i)
+      KeyInfo[i] = IIV[i];
+  }
+
+  // getName - Derive the full selector name and return it.
+  std::string getName() const;
+
+  using DeclarationNameExtra::getNumArgs;
+
+  using keyword_iterator = IdentifierInfo *const *;
+
+  keyword_iterator keyword_begin() const {
+    return reinterpret_cast<keyword_iterator>(this + 1);
+  }
+
+  keyword_iterator keyword_end() const {
+    return keyword_begin() + getNumArgs();
+  }
+
+  IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
+    assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
+    return keyword_begin()[i];
+  }
+
+  static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
+                      unsigned NumArgs) {
+    ID.AddInteger(NumArgs);
+    for (unsigned i = 0; i != NumArgs; ++i)
+      ID.AddPointer(ArgTys[i]);
+  }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    Profile(ID, keyword_begin(), getNumArgs());
+  }
+};
+
 /// Smart pointer class that efficiently represents Objective-C method
 /// names.
 ///
@@ -809,43 +927,42 @@ class Selector {
   enum IdentifierInfoFlag {
     // Empty selector = 0. Note that these enumeration values must
     // correspond to the enumeration values of DeclarationName::StoredNameKind
-    ZeroArg  = 0x01,
-    OneArg   = 0x02,
+    ZeroArg = 0x01,
+    OneArg = 0x02,
     MultiArg = 0x07,
-    ArgFlags = 0x07
   };
 
   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
-  /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
+  /// three bits of InfoPtr to store an IdentifierInfoFlag, but the highest
+  /// of them is also a discriminator for pointer type. Note that in any
   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
   /// 8 bytes even on 32 bits archs because of DeclarationName.
-  uintptr_t InfoPtr = 0;
+  llvm::PointerIntPair<
+      llvm::PointerUnion<IdentifierInfo *, MultiKeywordSelector *>, 2>
+      InfoPtr;
 
   Selector(IdentifierInfo *II, unsigned nArgs) {
-    InfoPtr = reinterpret_cast<uintptr_t>(II);
-    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
     assert(nArgs < 2 && "nArgs not equal to 0/1");
-    InfoPtr |= nArgs+1;
+    InfoPtr.setPointerAndInt(II, nArgs + 1);
   }
 
   Selector(MultiKeywordSelector *SI) {
-    InfoPtr = reinterpret_cast<uintptr_t>(SI);
-    assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
-    InfoPtr |= MultiArg;
+    InfoPtr.setPointerAndInt(SI, MultiArg & 0b11);
   }
 
   IdentifierInfo *getAsIdentifierInfo() const {
-    if (getIdentifierInfoFlag() < MultiArg)
-      return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
-    return nullptr;
+    return InfoPtr.getPointer().dyn_cast<IdentifierInfo *>();
   }
 
   MultiKeywordSelector *getMultiKeywordSelector() const {
-    return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
+    return InfoPtr.getPointer().get<MultiKeywordSelector *>();
   }
 
   unsigned getIdentifierInfoFlag() const {
-    return InfoPtr & ArgFlags;
+    unsigned new_flags = InfoPtr.getInt();
+    if (InfoPtr.getPointer().is<MultiKeywordSelector *>())
+      new_flags |= MultiArg;
+    return new_flags;
   }
 
   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
@@ -856,31 +973,27 @@ class Selector {
   /// The default ctor should only be used when creating data structures that
   ///  will contain selectors.
   Selector() = default;
-  explicit Selector(uintptr_t V) : InfoPtr(V) {}
+  explicit Selector(uintptr_t V) {
+    InfoPtr.setFromOpaqueValue(reinterpret_cast<void *>(V));
+  }
 
   /// operator==/!= - Indicate whether the specified selectors are identical.
   bool operator==(Selector RHS) const {
-    return InfoPtr == RHS.InfoPtr;
+    return InfoPtr.getOpaqueValue() == RHS.InfoPtr.getOpaqueValue();
   }
   bool operator!=(Selector RHS) const {
-    return InfoPtr != RHS.InfoPtr;
+    return InfoPtr.getOpaqueValue() != RHS.InfoPtr.getOpaqueValue();
   }
 
-  void *getAsOpaquePtr() const {
-    return reinterpret_cast<void*>(InfoPtr);
-  }
+  void *getAsOpaquePtr() const { return InfoPtr.getOpaqueValue(); }
 
   /// Determine whether this is the empty selector.
-  bool isNull() const { return InfoPtr == 0; }
+  bool isNull() const { return InfoPtr.getOpaqueValue() == nullptr; }
 
   // Predicates to identify the selector type.
-  bool isKeywordSelector() const {
-    return getIdentifierInfoFlag() != ZeroArg;
-  }
+  bool isKeywordSelector() const { return InfoPtr.getInt() != ZeroArg; }
 
-  bool isUnarySelector() const {
-    return getIdentifierInfoFlag() == ZeroArg;
-  }
+  bool isUnarySelector() const { return InfoPtr.getInt() == ZeroArg; }
 
   /// If this selector is the specific keyword selector described by Names.
   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
@@ -991,68 +1104,6 @@ class SelectorTable {
   static std::string getPropertyNameFromSetterSelector(Selector Sel);
 };
 
-namespace detail {
-
-/// DeclarationNameExtra is used as a base of various uncommon special names.
-/// This class is needed since DeclarationName has not enough space to store
-/// the kind of every possible names. Therefore the kind of common names is
-/// stored directly in DeclarationName, and the kind of uncommon names is
-/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
-/// DeclarationName needs the lower 3 bits to store the kind of common names.
-/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
-/// here is very likely to require changes in DeclarationName(Table).
-class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
-  friend class clang::DeclarationName;
-  friend class clang::DeclarationNameTable;
-
-protected:
-  /// The kind of "extra" information stored in the DeclarationName. See
-  /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
-  /// are used. Note that DeclarationName depends on the numerical values
-  /// of the enumerators in this enum. See DeclarationName::StoredNameKind
-  /// for more info.
-  enum ExtraKind {
-    CXXDeductionGuideName,
-    CXXLiteralOperatorName,
-    CXXUsingDirective,
-    ObjCMultiArgSelector
-  };
-
-  /// ExtraKindOrNumArgs has one of the following meaning:
-  ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
-  ///    is in this case in fact either a CXXDeductionGuideNameExtra or
-  ///    a CXXLiteralOperatorIdName.
-  ///
-  ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
-  ///
-  ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
-  ///    the number of arguments in the Objective-C selector, in which
-  ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
-  unsigned ExtraKindOrNumArgs;
-
-  DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
-  DeclarationNameExtra(unsigned NumArgs)
-      : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
-
-  /// Return the corresponding ExtraKind.
-  ExtraKind getKind() const {
-    return static_cast<ExtraKind>(ExtraKindOrNumArgs >
-                                          (unsigned)ObjCMultiArgSelector
-                                      ? (unsigned)ObjCMultiArgSelector
-                                      : ExtraKindOrNumArgs);
-  }
-
-  /// Return the number of arguments in an ObjC selector. Only valid when this
-  /// is indeed an ObjCMultiArgSelector.
-  unsigned getNumArgs() const {
-    assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
-           "getNumArgs called but this is not an ObjC selector!");
-    return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
-  }
-};
-
-} // namespace detail
-
 }  // namespace clang
 
 namespace llvm {
@@ -1089,34 +1140,6 @@ struct PointerLikeTypeTraits<clang::Selector> {
   static constexpr int NumLowBitsAvailable = 0;
 };
 
-// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
-// are not guaranteed to be 8-byte aligned.
-template<>
-struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
-  static void *getAsVoidPointer(clang::IdentifierInfo* P) {
-    return P;
-  }
-
-  static clang::IdentifierInfo *getFromVoidPointer(void *P) {
-    return static_cast<clang::IdentifierInfo*>(P);
-  }
-
-  static constexpr int NumLowBitsAvailable = 1;
-};
-
-template<>
-struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
-  static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
-    return P;
-  }
-
-  static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
-    return static_cast<const clang::IdentifierInfo*>(P);
-  }
-
-  static constexpr int NumLowBitsAvailable = 1;
-};
-
 } // namespace llvm
 
 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index e5599d545541085..c4c5a6eeced2832 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -512,63 +512,6 @@ unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
   return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
 }
 
-namespace clang {
-
-/// One of these variable length records is kept for each
-/// selector containing more than one keyword. We use a folding set
-/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
-/// this class is provided strictly through Selector.
-class alignas(IdentifierInfoAlignment) MultiKeywordSelector
-    : public detail::DeclarationNameExtra,
-      public llvm::FoldingSetNode {
-  MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
-
-public:
-  // Constructor for keyword selectors.
-  MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
-      : DeclarationNameExtra(nKeys) {
-    assert((nKeys > 1) && "not a multi-keyword selector");
-
-    // Fill in the trailing keyword array.
-    IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
-    for (unsigned i = 0; i != nKeys; ++i)
-      KeyInfo[i] = IIV[i];
-  }
-
-  // getName - Derive the full selector name and return it.
-  std::string getName() const;
-
-  using DeclarationNameExtra::getNumArgs;
-
-  using keyword_iterator = IdentifierInfo *const *;
-
-  keyword_iterator keyword_begin() const {
-    return reinterpret_cast<keyword_iterator>(this + 1);
-  }
-
-  keyword_iterator keyword_end() const {
-    return keyword_begin() + getNumArgs();
-  }
-
-  IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
-    assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
-    return keyword_begin()[i];
-  }
-
-  static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
-                      unsigned NumArgs) {
-    ID.AddInteger(NumArgs);
-    for (unsigned i = 0; i != NumArgs; ++i)
-      ID.AddPointer(ArgTys[i]);
-  }
-
-  void Profile(llvm::FoldingSetNodeID &ID) {
-    Profile(ID, keyword_begin(), getNumArgs());
-  }
-};
-
-} // namespace clang.
-
 bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
   assert(!Names.empty() && "must have >= 1 selector slots");
   if (getNumArgs() != Names.size())
@@ -624,7 +567,7 @@ std::string MultiKeywordSelector::getName() const {
 }
 
 std::string Selector::getAsString() const {
-  if (InfoPtr == 0)
+  if (isNull())
     return "<null selector>";
 
   if (getIdentifierInfoFlag() < MultiArg) {

>From 3cae7350cc4eed7c3f23e812afe3ee50f465fce3 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Tue, 24 Oct 2023 19:10:19 +0300
Subject: [PATCH 2/2] Add more comments suggested by Aaron

---
 clang/include/clang/Basic/IdentifierTable.h | 25 ++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 4972e64fee41e23..4ad3d1a5f513310 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -929,14 +929,23 @@ class Selector {
     // correspond to the enumeration values of DeclarationName::StoredNameKind
     ZeroArg = 0x01,
     OneArg = 0x02,
+    // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator value.
     MultiArg = 0x07,
   };
 
-  /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
-  /// three bits of InfoPtr to store an IdentifierInfoFlag, but the highest
-  /// of them is also a discriminator for pointer type. Note that in any
-  /// case IdentifierInfo and MultiKeywordSelector are already aligned to
-  /// 8 bytes even on 32 bits archs because of DeclarationName.
+  /// IMPORTANT NOTE: the order of the types in this PointerUnion are
+  /// important! The DeclarationName class has bidirectional conversion
+  /// to/from Selector through an opaque pointer (void *) which corresponds
+  /// to this PointerIntPair. The discriminator bit from the PointerUnion
+  /// corresponds to the high bit in the MultiArg enumerator. So while this
+  /// PointerIntPair only has two bits for the integer (and we mask off the
+  /// high bit in `MultiArg` when it is used), that discrimator bit is
+  /// still necessary for the opaque conversion. The discriminator bit
+  /// from the PointerUnion and the two integer bits from the
+  /// PointerIntPair are also exposed via the DeclarationName::StoredNameKind
+  /// enumeration; see the comments in DeclarationName.h for more details.
+  /// Do not reorder or add any arguments to this template
+  /// without thoroughly understanding how tightly coupled these classes are.
   llvm::PointerIntPair<
       llvm::PointerUnion<IdentifierInfo *, MultiKeywordSelector *>, 2>
       InfoPtr;
@@ -947,6 +956,9 @@ class Selector {
   }
 
   Selector(MultiKeywordSelector *SI) {
+    // IMPORTANT NOTE: we mask off the upper bit of this value because we only
+    // reserve two bits for the integer in the PointerIntPair. See the comments
+    // in `InfoPtr` for more details.
     InfoPtr.setPointerAndInt(SI, MultiArg & 0b11);
   }
 
@@ -960,6 +972,9 @@ class Selector {
 
   unsigned getIdentifierInfoFlag() const {
     unsigned new_flags = InfoPtr.getInt();
+    // IMPORTANT NOTE: We have to reconstitute this data rather than use the
+    // value directly from the PointerIntPair. See the comments in `InfoPtr`
+    // for more details.
     if (InfoPtr.getPointer().is<MultiKeywordSelector *>())
       new_flags |= MultiArg;
     return new_flags;



More information about the cfe-commits mailing list