[clang] [clang][NFC] Refactor `Selector` to use `PointerIntPair` inside (PR #69916)
Vlad Serebrennikov via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 24 09:23:30 PDT 2023
https://github.com/Endilll updated https://github.com/llvm/llvm-project/pull/69916
>From 58ebdda4e44b3fa2547d85a6cc9d5b0aa913b22a Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Mon, 23 Oct 2023 13:55:46 +0300
Subject: [PATCH 1/3] [clang][NFC] Refactor `Selector` to use `PointerIntPair`
inside
---
clang/include/clang/AST/DeclarationName.h | 3 +-
clang/include/clang/Basic/IdentifierTable.h | 261 +++++++++++---------
clang/lib/Basic/IdentifierTable.cpp | 59 +----
3 files changed, 145 insertions(+), 178 deletions(-)
diff --git a/clang/include/clang/AST/DeclarationName.h b/clang/include/clang/AST/DeclarationName.h
index b06931ea3e418f8..c9b01dc53964bd0 100644
--- a/clang/include/clang/AST/DeclarationName.h
+++ b/clang/include/clang/AST/DeclarationName.h
@@ -362,7 +362,8 @@ class DeclarationName {
}
/// Construct a declaration name from an Objective-C selector.
- DeclarationName(Selector Sel) : Ptr(Sel.InfoPtr) {}
+ DeclarationName(Selector Sel)
+ : Ptr(reinterpret_cast<uintptr_t>(Sel.InfoPtr.getOpaqueValue())) {}
/// Returns the name for all C++ using-directives.
static DeclarationName getUsingDirectiveName() {
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 1a1ffddf2b1c601..4972e64fee41e23 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -19,6 +19,9 @@
#include "clang/Basic/LLVM.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -794,6 +797,121 @@ enum ObjCStringFormatFamily {
SFF_CFString
};
+namespace detail {
+
+/// DeclarationNameExtra is used as a base of various uncommon special names.
+/// This class is needed since DeclarationName has not enough space to store
+/// the kind of every possible names. Therefore the kind of common names is
+/// stored directly in DeclarationName, and the kind of uncommon names is
+/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
+/// DeclarationName needs the lower 3 bits to store the kind of common names.
+/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
+/// here is very likely to require changes in DeclarationName(Table).
+class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
+ friend class clang::DeclarationName;
+ friend class clang::DeclarationNameTable;
+
+protected:
+ /// The kind of "extra" information stored in the DeclarationName. See
+ /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
+ /// are used. Note that DeclarationName depends on the numerical values
+ /// of the enumerators in this enum. See DeclarationName::StoredNameKind
+ /// for more info.
+ enum ExtraKind {
+ CXXDeductionGuideName,
+ CXXLiteralOperatorName,
+ CXXUsingDirective,
+ ObjCMultiArgSelector
+ };
+
+ /// ExtraKindOrNumArgs has one of the following meaning:
+ /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
+ /// is in this case in fact either a CXXDeductionGuideNameExtra or
+ /// a CXXLiteralOperatorIdName.
+ ///
+ /// * It may be also name common to C++ using-directives (CXXUsingDirective),
+ ///
+ /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
+ /// the number of arguments in the Objective-C selector, in which
+ /// case the DeclarationNameExtra is also a MultiKeywordSelector.
+ unsigned ExtraKindOrNumArgs;
+
+ DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
+ DeclarationNameExtra(unsigned NumArgs)
+ : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
+
+ /// Return the corresponding ExtraKind.
+ ExtraKind getKind() const {
+ return static_cast<ExtraKind>(ExtraKindOrNumArgs >
+ (unsigned)ObjCMultiArgSelector
+ ? (unsigned)ObjCMultiArgSelector
+ : ExtraKindOrNumArgs);
+ }
+
+ /// Return the number of arguments in an ObjC selector. Only valid when this
+ /// is indeed an ObjCMultiArgSelector.
+ unsigned getNumArgs() const {
+ assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
+ "getNumArgs called but this is not an ObjC selector!");
+ return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
+ }
+};
+
+} // namespace detail
+
+/// One of these variable length records is kept for each
+/// selector containing more than one keyword. We use a folding set
+/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
+/// this class is provided strictly through Selector.
+class alignas(IdentifierInfoAlignment) MultiKeywordSelector
+ : public detail::DeclarationNameExtra,
+ public llvm::FoldingSetNode {
+ MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
+
+public:
+ // Constructor for keyword selectors.
+ MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
+ : DeclarationNameExtra(nKeys) {
+ assert((nKeys > 1) && "not a multi-keyword selector");
+
+ // Fill in the trailing keyword array.
+ IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
+ for (unsigned i = 0; i != nKeys; ++i)
+ KeyInfo[i] = IIV[i];
+ }
+
+ // getName - Derive the full selector name and return it.
+ std::string getName() const;
+
+ using DeclarationNameExtra::getNumArgs;
+
+ using keyword_iterator = IdentifierInfo *const *;
+
+ keyword_iterator keyword_begin() const {
+ return reinterpret_cast<keyword_iterator>(this + 1);
+ }
+
+ keyword_iterator keyword_end() const {
+ return keyword_begin() + getNumArgs();
+ }
+
+ IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
+ assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
+ return keyword_begin()[i];
+ }
+
+ static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
+ unsigned NumArgs) {
+ ID.AddInteger(NumArgs);
+ for (unsigned i = 0; i != NumArgs; ++i)
+ ID.AddPointer(ArgTys[i]);
+ }
+
+ void Profile(llvm::FoldingSetNodeID &ID) {
+ Profile(ID, keyword_begin(), getNumArgs());
+ }
+};
+
/// Smart pointer class that efficiently represents Objective-C method
/// names.
///
@@ -809,43 +927,42 @@ class Selector {
enum IdentifierInfoFlag {
// Empty selector = 0. Note that these enumeration values must
// correspond to the enumeration values of DeclarationName::StoredNameKind
- ZeroArg = 0x01,
- OneArg = 0x02,
+ ZeroArg = 0x01,
+ OneArg = 0x02,
MultiArg = 0x07,
- ArgFlags = 0x07
};
/// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
- /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
+ /// three bits of InfoPtr to store an IdentifierInfoFlag, but the highest
+ /// of them is also a discriminator for pointer type. Note that in any
/// case IdentifierInfo and MultiKeywordSelector are already aligned to
/// 8 bytes even on 32 bits archs because of DeclarationName.
- uintptr_t InfoPtr = 0;
+ llvm::PointerIntPair<
+ llvm::PointerUnion<IdentifierInfo *, MultiKeywordSelector *>, 2>
+ InfoPtr;
Selector(IdentifierInfo *II, unsigned nArgs) {
- InfoPtr = reinterpret_cast<uintptr_t>(II);
- assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
assert(nArgs < 2 && "nArgs not equal to 0/1");
- InfoPtr |= nArgs+1;
+ InfoPtr.setPointerAndInt(II, nArgs + 1);
}
Selector(MultiKeywordSelector *SI) {
- InfoPtr = reinterpret_cast<uintptr_t>(SI);
- assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
- InfoPtr |= MultiArg;
+ InfoPtr.setPointerAndInt(SI, MultiArg & 0b11);
}
IdentifierInfo *getAsIdentifierInfo() const {
- if (getIdentifierInfoFlag() < MultiArg)
- return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
- return nullptr;
+ return InfoPtr.getPointer().dyn_cast<IdentifierInfo *>();
}
MultiKeywordSelector *getMultiKeywordSelector() const {
- return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
+ return InfoPtr.getPointer().get<MultiKeywordSelector *>();
}
unsigned getIdentifierInfoFlag() const {
- return InfoPtr & ArgFlags;
+ unsigned new_flags = InfoPtr.getInt();
+ if (InfoPtr.getPointer().is<MultiKeywordSelector *>())
+ new_flags |= MultiArg;
+ return new_flags;
}
static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
@@ -856,31 +973,27 @@ class Selector {
/// The default ctor should only be used when creating data structures that
/// will contain selectors.
Selector() = default;
- explicit Selector(uintptr_t V) : InfoPtr(V) {}
+ explicit Selector(uintptr_t V) {
+ InfoPtr.setFromOpaqueValue(reinterpret_cast<void *>(V));
+ }
/// operator==/!= - Indicate whether the specified selectors are identical.
bool operator==(Selector RHS) const {
- return InfoPtr == RHS.InfoPtr;
+ return InfoPtr.getOpaqueValue() == RHS.InfoPtr.getOpaqueValue();
}
bool operator!=(Selector RHS) const {
- return InfoPtr != RHS.InfoPtr;
+ return InfoPtr.getOpaqueValue() != RHS.InfoPtr.getOpaqueValue();
}
- void *getAsOpaquePtr() const {
- return reinterpret_cast<void*>(InfoPtr);
- }
+ void *getAsOpaquePtr() const { return InfoPtr.getOpaqueValue(); }
/// Determine whether this is the empty selector.
- bool isNull() const { return InfoPtr == 0; }
+ bool isNull() const { return InfoPtr.getOpaqueValue() == nullptr; }
// Predicates to identify the selector type.
- bool isKeywordSelector() const {
- return getIdentifierInfoFlag() != ZeroArg;
- }
+ bool isKeywordSelector() const { return InfoPtr.getInt() != ZeroArg; }
- bool isUnarySelector() const {
- return getIdentifierInfoFlag() == ZeroArg;
- }
+ bool isUnarySelector() const { return InfoPtr.getInt() == ZeroArg; }
/// If this selector is the specific keyword selector described by Names.
bool isKeywordSelector(ArrayRef<StringRef> Names) const;
@@ -991,68 +1104,6 @@ class SelectorTable {
static std::string getPropertyNameFromSetterSelector(Selector Sel);
};
-namespace detail {
-
-/// DeclarationNameExtra is used as a base of various uncommon special names.
-/// This class is needed since DeclarationName has not enough space to store
-/// the kind of every possible names. Therefore the kind of common names is
-/// stored directly in DeclarationName, and the kind of uncommon names is
-/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
-/// DeclarationName needs the lower 3 bits to store the kind of common names.
-/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
-/// here is very likely to require changes in DeclarationName(Table).
-class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
- friend class clang::DeclarationName;
- friend class clang::DeclarationNameTable;
-
-protected:
- /// The kind of "extra" information stored in the DeclarationName. See
- /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
- /// are used. Note that DeclarationName depends on the numerical values
- /// of the enumerators in this enum. See DeclarationName::StoredNameKind
- /// for more info.
- enum ExtraKind {
- CXXDeductionGuideName,
- CXXLiteralOperatorName,
- CXXUsingDirective,
- ObjCMultiArgSelector
- };
-
- /// ExtraKindOrNumArgs has one of the following meaning:
- /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
- /// is in this case in fact either a CXXDeductionGuideNameExtra or
- /// a CXXLiteralOperatorIdName.
- ///
- /// * It may be also name common to C++ using-directives (CXXUsingDirective),
- ///
- /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
- /// the number of arguments in the Objective-C selector, in which
- /// case the DeclarationNameExtra is also a MultiKeywordSelector.
- unsigned ExtraKindOrNumArgs;
-
- DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
- DeclarationNameExtra(unsigned NumArgs)
- : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
-
- /// Return the corresponding ExtraKind.
- ExtraKind getKind() const {
- return static_cast<ExtraKind>(ExtraKindOrNumArgs >
- (unsigned)ObjCMultiArgSelector
- ? (unsigned)ObjCMultiArgSelector
- : ExtraKindOrNumArgs);
- }
-
- /// Return the number of arguments in an ObjC selector. Only valid when this
- /// is indeed an ObjCMultiArgSelector.
- unsigned getNumArgs() const {
- assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
- "getNumArgs called but this is not an ObjC selector!");
- return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
- }
-};
-
-} // namespace detail
-
} // namespace clang
namespace llvm {
@@ -1089,34 +1140,6 @@ struct PointerLikeTypeTraits<clang::Selector> {
static constexpr int NumLowBitsAvailable = 0;
};
-// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
-// are not guaranteed to be 8-byte aligned.
-template<>
-struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
- static void *getAsVoidPointer(clang::IdentifierInfo* P) {
- return P;
- }
-
- static clang::IdentifierInfo *getFromVoidPointer(void *P) {
- return static_cast<clang::IdentifierInfo*>(P);
- }
-
- static constexpr int NumLowBitsAvailable = 1;
-};
-
-template<>
-struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
- static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
- return P;
- }
-
- static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
- return static_cast<const clang::IdentifierInfo*>(P);
- }
-
- static constexpr int NumLowBitsAvailable = 1;
-};
-
} // namespace llvm
#endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index e5599d545541085..c4c5a6eeced2832 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -512,63 +512,6 @@ unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
}
-namespace clang {
-
-/// One of these variable length records is kept for each
-/// selector containing more than one keyword. We use a folding set
-/// to unique aggregate names (keyword selectors in ObjC parlance). Access to
-/// this class is provided strictly through Selector.
-class alignas(IdentifierInfoAlignment) MultiKeywordSelector
- : public detail::DeclarationNameExtra,
- public llvm::FoldingSetNode {
- MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
-
-public:
- // Constructor for keyword selectors.
- MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
- : DeclarationNameExtra(nKeys) {
- assert((nKeys > 1) && "not a multi-keyword selector");
-
- // Fill in the trailing keyword array.
- IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
- for (unsigned i = 0; i != nKeys; ++i)
- KeyInfo[i] = IIV[i];
- }
-
- // getName - Derive the full selector name and return it.
- std::string getName() const;
-
- using DeclarationNameExtra::getNumArgs;
-
- using keyword_iterator = IdentifierInfo *const *;
-
- keyword_iterator keyword_begin() const {
- return reinterpret_cast<keyword_iterator>(this + 1);
- }
-
- keyword_iterator keyword_end() const {
- return keyword_begin() + getNumArgs();
- }
-
- IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
- assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
- return keyword_begin()[i];
- }
-
- static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
- unsigned NumArgs) {
- ID.AddInteger(NumArgs);
- for (unsigned i = 0; i != NumArgs; ++i)
- ID.AddPointer(ArgTys[i]);
- }
-
- void Profile(llvm::FoldingSetNodeID &ID) {
- Profile(ID, keyword_begin(), getNumArgs());
- }
-};
-
-} // namespace clang.
-
bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
assert(!Names.empty() && "must have >= 1 selector slots");
if (getNumArgs() != Names.size())
@@ -624,7 +567,7 @@ std::string MultiKeywordSelector::getName() const {
}
std::string Selector::getAsString() const {
- if (InfoPtr == 0)
+ if (isNull())
return "<null selector>";
if (getIdentifierInfoFlag() < MultiArg) {
>From 3cae7350cc4eed7c3f23e812afe3ee50f465fce3 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Tue, 24 Oct 2023 19:10:19 +0300
Subject: [PATCH 2/3] Add more comments suggested by Aaron
---
clang/include/clang/Basic/IdentifierTable.h | 25 ++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 4972e64fee41e23..4ad3d1a5f513310 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -929,14 +929,23 @@ class Selector {
// correspond to the enumeration values of DeclarationName::StoredNameKind
ZeroArg = 0x01,
OneArg = 0x02,
+ // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator value.
MultiArg = 0x07,
};
- /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
- /// three bits of InfoPtr to store an IdentifierInfoFlag, but the highest
- /// of them is also a discriminator for pointer type. Note that in any
- /// case IdentifierInfo and MultiKeywordSelector are already aligned to
- /// 8 bytes even on 32 bits archs because of DeclarationName.
+ /// IMPORTANT NOTE: the order of the types in this PointerUnion are
+ /// important! The DeclarationName class has bidirectional conversion
+ /// to/from Selector through an opaque pointer (void *) which corresponds
+ /// to this PointerIntPair. The discriminator bit from the PointerUnion
+ /// corresponds to the high bit in the MultiArg enumerator. So while this
+ /// PointerIntPair only has two bits for the integer (and we mask off the
+ /// high bit in `MultiArg` when it is used), that discrimator bit is
+ /// still necessary for the opaque conversion. The discriminator bit
+ /// from the PointerUnion and the two integer bits from the
+ /// PointerIntPair are also exposed via the DeclarationName::StoredNameKind
+ /// enumeration; see the comments in DeclarationName.h for more details.
+ /// Do not reorder or add any arguments to this template
+ /// without thoroughly understanding how tightly coupled these classes are.
llvm::PointerIntPair<
llvm::PointerUnion<IdentifierInfo *, MultiKeywordSelector *>, 2>
InfoPtr;
@@ -947,6 +956,9 @@ class Selector {
}
Selector(MultiKeywordSelector *SI) {
+ // IMPORTANT NOTE: we mask off the upper bit of this value because we only
+ // reserve two bits for the integer in the PointerIntPair. See the comments
+ // in `InfoPtr` for more details.
InfoPtr.setPointerAndInt(SI, MultiArg & 0b11);
}
@@ -960,6 +972,9 @@ class Selector {
unsigned getIdentifierInfoFlag() const {
unsigned new_flags = InfoPtr.getInt();
+ // IMPORTANT NOTE: We have to reconstitute this data rather than use the
+ // value directly from the PointerIntPair. See the comments in `InfoPtr`
+ // for more details.
if (InfoPtr.getPointer().is<MultiKeywordSelector *>())
new_flags |= MultiArg;
return new_flags;
>From 32ec561e990ebdde55976b8d1e562734cf7c8cf3 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Tue, 24 Oct 2023 19:23:13 +0300
Subject: [PATCH 3/3] Run clang-format
---
clang/include/clang/Basic/IdentifierTable.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 4ad3d1a5f513310..2eafe5938406749 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -929,7 +929,8 @@ class Selector {
// correspond to the enumeration values of DeclarationName::StoredNameKind
ZeroArg = 0x01,
OneArg = 0x02,
- // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator value.
+ // IMPORTANT NOTE: see comments in InfoPtr (below) about this enumerator
+ // value.
MultiArg = 0x07,
};
More information about the cfe-commits
mailing list