[clang] [clang] Refactor `IdentifierInfo::ObjcOrBuiltinID` (PR #71709)
Vlad Serebrennikov via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 9 12:36:07 PST 2024
https://github.com/Endilll updated https://github.com/llvm/llvm-project/pull/71709
>From 2823d38544d18213b5bf48c67e4eedd52acce850 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Wed, 8 Nov 2023 20:30:37 +0300
Subject: [PATCH 1/3] [clang] Refactor `IdentifierInfo::ObjcOrBuiltinID`
This patch refactors how values are stored inside `IdentifierInfo::ObjcOrBuiltinID` bit-field, and annotates it with `preferred_type`. In order to make the value easier to interpret by debuggers, a new `ObjCKeywordOrInterestingOrBuiltin` is added. Previous "layout" of this fields couldn't be represented with this new enum, because it skipped over some arbitrary enumerators, so a new "layout" was invented based on `ObjCKeywordOrInterestingOrBuiltin` enum. I believe the new layout is simpler than the new one.
---
clang/include/clang/Basic/IdentifierTable.h | 117 ++++++++++++--------
1 file changed, 73 insertions(+), 44 deletions(-)
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 0898e7d39dd7de..fa76228da2b143 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
+#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/TokenKinds.h"
@@ -86,19 +87,26 @@ enum { IdentifierInfoAlignment = 8 };
static constexpr int ObjCOrBuiltinIDBits = 16;
/// The "layout" of ObjCOrBuiltinID is:
-/// - The first value (0) represents "not a special identifier".
-/// - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not
-/// including objc_not_keyword).
-/// - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent
-/// InterestingIdentifierKinds (not including not_interesting).
-/// - The rest of the values represent builtin IDs (not including NotBuiltin).
-static constexpr int FirstObjCKeywordID = 1;
-static constexpr int LastObjCKeywordID =
- FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2;
-static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1;
-static constexpr int LastInterestingIdentifierID =
- FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2;
-static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1;
+/// - ObjCKeywordKind enumerators
+/// - InterestingIdentifierKind enumerators
+/// - Builtin::ID enumerators
+/// - NonSpecialIdentifier
+enum class ObjCKeywordOrInterestingOrBuiltin {
+#define OBJC_AT_KEYWORD(X) objc_##X,
+#include "clang/Basic/TokenKinds.def"
+ NUM_OBJC_KEYWORDS,
+
+#define INTERESTING_IDENTIFIER(X) X,
+#include "clang/Basic/TokenKinds.def"
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS,
+
+ NotBuiltin,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/Builtins.def"
+ FirstTSBuiltin,
+
+ NonSpecialIdentifier = 65534
+};
/// One of these records is kept for each identifier that
/// is lexed. This contains information about whether the token was \#define'd,
@@ -113,9 +121,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(tok::TokenKind)
unsigned TokenID : 9;
- // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
- // First NUM_OBJC_KEYWORDS values are for Objective-C,
- // the remaining values are for builtins.
+ LLVM_PREFERRED_TYPE(ObjCKeywordOrInterestingOrBuiltin)
unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
// True if there is a #define for this.
@@ -198,13 +204,16 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
IdentifierInfo()
- : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
- HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
- IsPoisoned(false), IsCPPOperatorKeyword(false),
- NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
- FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
- IsModulesImport(false), IsMangledOpenMPVariantName(false),
- IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
+ : TokenID(tok::identifier),
+ ObjCOrBuiltinID(llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)),
+ HasMacro(false), HadMacro(false), IsExtension(false),
+ IsFutureCompatKeyword(false), IsPoisoned(false),
+ IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
+ IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
+ RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
+ IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
+ IsRestrictExpansion(false), IsFinal(false) {}
public:
IdentifierInfo(const IdentifierInfo &) = delete;
@@ -332,42 +341,62 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
///
/// For example, 'class' will return tok::objc_class if ObjC is enabled.
tok::ObjCKeywordKind getObjCKeywordID() const {
- static_assert(FirstObjCKeywordID == 1,
- "hard-coding this assumption to simplify code");
- if (ObjCOrBuiltinID <= LastObjCKeywordID)
- return tok::ObjCKeywordKind(ObjCOrBuiltinID);
- else
- return tok::objc_not_keyword;
+ auto Value =
+ static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+ if (Value < ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS)
+ return static_cast<tok::ObjCKeywordKind>(ObjCOrBuiltinID);
+ return tok::objc_not_keyword;
+ }
+ void setObjCKeywordID(tok::ObjCKeywordKind ID) {
+ ObjCOrBuiltinID = ID;
+ assert(getObjCKeywordID() == ID && "ID too large for field!");
}
- void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
/// Return a value indicating whether this is a builtin function.
- ///
- /// 0 is not-built-in. 1+ are specific builtin functions.
unsigned getBuiltinID() const {
- if (ObjCOrBuiltinID >= FirstBuiltinID)
- return 1 + (ObjCOrBuiltinID - FirstBuiltinID);
- else
- return 0;
+ auto Value =
+ static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+ if (Value > ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS &&
+ Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)
+ return static_cast<Builtin::ID>(
+ ObjCOrBuiltinID - 1 -
+ llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS));
+ return Builtin::ID::NotBuiltin;
}
void setBuiltinID(unsigned ID) {
- assert(ID != 0);
- ObjCOrBuiltinID = FirstBuiltinID + (ID - 1);
+ assert(ID != Builtin::ID::NotBuiltin);
+ ObjCOrBuiltinID =
+ ID + 1 +
+ llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS);
assert(getBuiltinID() == ID && "ID too large for field!");
}
- void clearBuiltinID() { ObjCOrBuiltinID = 0; }
+ void clearBuiltinID() {
+ ObjCOrBuiltinID = llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier);
+ }
tok::InterestingIdentifierKind getInterestingIdentifierID() const {
- if (ObjCOrBuiltinID >= FirstInterestingIdentifierID &&
- ObjCOrBuiltinID <= LastInterestingIdentifierID)
- return tok::InterestingIdentifierKind(
- 1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID));
+ auto Value =
+ static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+ if (Value > ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS &&
+ Value < ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS)
+ return static_cast<tok::InterestingIdentifierKind>(
+ ObjCOrBuiltinID - 1 -
+ llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS));
else
return tok::not_interesting;
}
void setInterestingIdentifierID(unsigned ID) {
assert(ID != tok::not_interesting);
- ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1);
+ ObjCOrBuiltinID = ID + 1 +
+ llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS);
assert(getInterestingIdentifierID() == ID && "ID too large for field!");
}
>From 4d76f2d881fedd66d63f3c0b4d9feadd75004306 Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Thu, 16 Nov 2023 14:45:37 +0300
Subject: [PATCH 2/3] Fix checking `getObjCOrBuiltinID()` against zero
---
clang/lib/Serialization/ASTReader.cpp | 6 +++++-
clang/lib/Serialization/ASTWriter.cpp | 6 +++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 42b48d230af7a9..5b6244cf6a596f 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -988,7 +988,11 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II,
bool IsModule) {
return II.hadMacroDefinition() || II.isPoisoned() ||
- (!IsModule && II.getObjCOrBuiltinID()) ||
+ (!IsModule &&
+ II.getInterestingIdentifierID() !=
+ tok::InterestingIdentifierKind::not_interesting &&
+ II.getBuiltinID() != Builtin::ID::NotBuiltin &&
+ II.getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) ||
II.hasRevertedTokenIDToIdentifier() ||
(!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) &&
II.getFETokenInfo());
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 1e86566d81fbc0..ee2b77c633ad47 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3575,7 +3575,11 @@ class ASTIdentifierTableTrait {
/// to check that.
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
if (MacroOffset || II->isPoisoned() ||
- (!IsModule && II->getObjCOrBuiltinID()) ||
+ (!IsModule &&
+ II->getInterestingIdentifierID() !=
+ tok::InterestingIdentifierKind::not_interesting &&
+ II->getBuiltinID() != Builtin::ID::NotBuiltin &&
+ II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) ||
II->hasRevertedTokenIDToIdentifier() ||
(NeedDecls && II->getFETokenInfo()))
return true;
>From 6811d5067beea7e1df2b5834916e4e12720763fd Mon Sep 17 00:00:00 2001
From: Vlad Serebrennikov <serebrennikov.vladislav at gmail.com>
Date: Fri, 17 Nov 2023 13:11:51 +0300
Subject: [PATCH 3/3] Simplify `isInterestingIdentifier` implementation
---
clang/lib/Serialization/ASTReader.cpp | 7 ++-----
clang/lib/Serialization/ASTWriter.cpp | 7 ++-----
2 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 5b6244cf6a596f..5fd0a3f5532204 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -988,11 +988,8 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II,
bool IsModule) {
return II.hadMacroDefinition() || II.isPoisoned() ||
- (!IsModule &&
- II.getInterestingIdentifierID() !=
- tok::InterestingIdentifierKind::not_interesting &&
- II.getBuiltinID() != Builtin::ID::NotBuiltin &&
- II.getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) ||
+ (!IsModule && II.getInterestingIdentifierID() !=
+ tok::InterestingIdentifierKind::not_interesting) ||
II.hasRevertedTokenIDToIdentifier() ||
(!(IsModule && Reader.getPreprocessor().getLangOpts().CPlusPlus) &&
II.getFETokenInfo());
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ee2b77c633ad47..1a9963ec674b3c 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3575,11 +3575,8 @@ class ASTIdentifierTableTrait {
/// to check that.
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
if (MacroOffset || II->isPoisoned() ||
- (!IsModule &&
- II->getInterestingIdentifierID() !=
- tok::InterestingIdentifierKind::not_interesting &&
- II->getBuiltinID() != Builtin::ID::NotBuiltin &&
- II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword) ||
+ (!IsModule && II->getInterestingIdentifierID() !=
+ tok::InterestingIdentifierKind::not_interesting) ||
II->hasRevertedTokenIDToIdentifier() ||
(NeedDecls && II->getFETokenInfo()))
return true;
More information about the cfe-commits
mailing list