[clang] [clang] Refactor `IdentifierInfo::ObjcOrBuiltinID` (PR #71709)

via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 8 09:33:03 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Vlad Serebrennikov (Endilll)

<details>
<summary>Changes</summary>

This patch refactors how values are stored inside `IdentifierInfo::ObjcOrBuiltinID` bit-field, and annotates it with `preferred_type`. In order to make the value easier to interpret by debuggers, a new `ObjCKeywordOrInterestingOrBuiltin` is added. Previous "layout" of this fields couldn't be represented with this new enum, because it skipped over some arbitrary enumerators, so a new "layout" was invented based on `ObjCKeywordOrInterestingOrBuiltin` enum. I believe the new layout is simpler than the new one.

---
Full diff: https://github.com/llvm/llvm-project/pull/71709.diff


1 Files Affected:

- (modified) clang/include/clang/Basic/IdentifierTable.h (+73-44) 


``````````diff
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 0898e7d39dd7dee..fa76228da2b143a 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
 
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/TokenKinds.h"
@@ -86,19 +87,26 @@ enum { IdentifierInfoAlignment = 8 };
 static constexpr int ObjCOrBuiltinIDBits = 16;
 
 /// The "layout" of ObjCOrBuiltinID is:
-///  - The first value (0) represents "not a special identifier".
-///  - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not
-///    including objc_not_keyword).
-///  - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent
-///    InterestingIdentifierKinds (not including not_interesting).
-///  - The rest of the values represent builtin IDs (not including NotBuiltin).
-static constexpr int FirstObjCKeywordID = 1;
-static constexpr int LastObjCKeywordID =
-    FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2;
-static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1;
-static constexpr int LastInterestingIdentifierID =
-    FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2;
-static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1;
+///  - ObjCKeywordKind enumerators
+///  - InterestingIdentifierKind enumerators
+///  - Builtin::ID enumerators
+///  - NonSpecialIdentifier
+enum class ObjCKeywordOrInterestingOrBuiltin {
+#define OBJC_AT_KEYWORD(X) objc_##X,
+#include "clang/Basic/TokenKinds.def"
+  NUM_OBJC_KEYWORDS,
+
+#define INTERESTING_IDENTIFIER(X) X,
+#include "clang/Basic/TokenKinds.def"
+  NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS,
+
+  NotBuiltin,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/Builtins.def"
+  FirstTSBuiltin,
+
+  NonSpecialIdentifier = 65534
+};
 
 /// One of these records is kept for each identifier that
 /// is lexed.  This contains information about whether the token was \#define'd,
@@ -113,9 +121,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   LLVM_PREFERRED_TYPE(tok::TokenKind)
   unsigned TokenID : 9;
 
-  // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
-  // First NUM_OBJC_KEYWORDS values are for Objective-C,
-  // the remaining values are for builtins.
+  LLVM_PREFERRED_TYPE(ObjCKeywordOrInterestingOrBuiltin)
   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
 
   // True if there is a #define for this.
@@ -198,13 +204,16 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
 
   IdentifierInfo()
-      : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
-        HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
-        IsPoisoned(false), IsCPPOperatorKeyword(false),
-        NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
-        FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
-        IsModulesImport(false), IsMangledOpenMPVariantName(false),
-        IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
+      : TokenID(tok::identifier),
+        ObjCOrBuiltinID(llvm::to_underlying(
+            ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)),
+        HasMacro(false), HadMacro(false), IsExtension(false),
+        IsFutureCompatKeyword(false), IsPoisoned(false),
+        IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
+        IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
+        RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
+        IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
+        IsRestrictExpansion(false), IsFinal(false) {}
 
 public:
   IdentifierInfo(const IdentifierInfo &) = delete;
@@ -332,42 +341,62 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   ///
   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
   tok::ObjCKeywordKind getObjCKeywordID() const {
-    static_assert(FirstObjCKeywordID == 1,
-                  "hard-coding this assumption to simplify code");
-    if (ObjCOrBuiltinID <= LastObjCKeywordID)
-      return tok::ObjCKeywordKind(ObjCOrBuiltinID);
-    else
-      return tok::objc_not_keyword;
+    auto Value =
+        static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+    if (Value < ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS)
+      return static_cast<tok::ObjCKeywordKind>(ObjCOrBuiltinID);
+    return tok::objc_not_keyword;
+  }
+  void setObjCKeywordID(tok::ObjCKeywordKind ID) {
+    ObjCOrBuiltinID = ID;
+    assert(getObjCKeywordID() == ID && "ID too large for field!");
   }
-  void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
 
   /// Return a value indicating whether this is a builtin function.
-  ///
-  /// 0 is not-built-in. 1+ are specific builtin functions.
   unsigned getBuiltinID() const {
-    if (ObjCOrBuiltinID >= FirstBuiltinID)
-      return 1 + (ObjCOrBuiltinID - FirstBuiltinID);
-    else
-      return 0;
+    auto Value =
+        static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+    if (Value > ObjCKeywordOrInterestingOrBuiltin::
+                    NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS &&
+        Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)
+      return static_cast<Builtin::ID>(
+          ObjCOrBuiltinID - 1 -
+          llvm::to_underlying(
+              ObjCKeywordOrInterestingOrBuiltin::
+                  NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS));
+    return Builtin::ID::NotBuiltin;
   }
   void setBuiltinID(unsigned ID) {
-    assert(ID != 0);
-    ObjCOrBuiltinID = FirstBuiltinID + (ID - 1);
+    assert(ID != Builtin::ID::NotBuiltin);
+    ObjCOrBuiltinID =
+        ID + 1 +
+        llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::
+                                NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS);
     assert(getBuiltinID() == ID && "ID too large for field!");
   }
-  void clearBuiltinID() { ObjCOrBuiltinID = 0; }
+  void clearBuiltinID() {
+    ObjCOrBuiltinID = llvm::to_underlying(
+        ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier);
+  }
 
   tok::InterestingIdentifierKind getInterestingIdentifierID() const {
-    if (ObjCOrBuiltinID >= FirstInterestingIdentifierID &&
-        ObjCOrBuiltinID <= LastInterestingIdentifierID)
-      return tok::InterestingIdentifierKind(
-          1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID));
+    auto Value =
+        static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+    if (Value > ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS &&
+        Value < ObjCKeywordOrInterestingOrBuiltin::
+                    NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS)
+      return static_cast<tok::InterestingIdentifierKind>(
+          ObjCOrBuiltinID - 1 -
+          llvm::to_underlying(
+              ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS));
     else
       return tok::not_interesting;
   }
   void setInterestingIdentifierID(unsigned ID) {
     assert(ID != tok::not_interesting);
-    ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1);
+    ObjCOrBuiltinID = ID + 1 +
+                      llvm::to_underlying(
+                          ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS);
     assert(getInterestingIdentifierID() == ID && "ID too large for field!");
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/71709


More information about the cfe-commits mailing list