[clang] [clang] Optimize castToDeclContext for 2% improvement in build times (PR #76825)

Pol M via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 4 06:20:59 PST 2024


https://github.com/Destroyerrrocket updated https://github.com/llvm/llvm-project/pull/76825

>From 2afecb35d80df788641c1b45cb74077037ff075f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pol=20Marcet=20Sard=C3=A0?= <polmarcetsarda at gmail.com>
Date: Wed, 3 Jan 2024 16:00:42 +0100
Subject: [PATCH] [clang] Optimize castToDeclContext for 2% improvement in
 build times

castToDeclContext is a heavily used function, and as such, it needs to
be kept as slim as feasible to preserve as much performance as possible.
To this end, it was observed that the function was generating suboptimal
assembly code, and putting the most common execution path in the longest
sequence of instructions. This patch addresses this by guiding the
compiler towards generating a lookup table of offsets, which can be used
to perform an addition on the pointer. This results in a 1-2%
improvement on debug builds (and a negligible improvement on release).

To achieve this, the switch was simplified to flatten the if statements
in the default branch. In order to make the values of the switch more
compact, encouraging LLVM to generate a look-up table instead of a jump
table, the AST TableGen generator was modified so it can take order
priority based on class inheritance. This combination allowed for a more
optimal generation of the function. Of note, 2 other functions with an
equivalent structure also needed to be modified.

Please refer to the issue/PR for exact assembly results:
https://github.com/llvm/llvm-project/issues/76824
https://github.com/llvm/llvm-project/pull/76825

Reviewers: Cor3ntin & Endilll
---
 clang/include/clang/AST/DeclCXX.h             |   8 ++
 clang/lib/AST/DeclBase.cpp                    |  50 +++------
 clang/utils/TableGen/ClangASTNodesEmitter.cpp | 103 +++++++++++-------
 clang/utils/TableGen/TableGen.cpp             |   3 +-
 clang/utils/TableGen/TableGenBackends.h       |  10 +-
 5 files changed, 97 insertions(+), 77 deletions(-)

diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index 432293583576b5..984a4d8bab5e77 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -2044,6 +2044,14 @@ class RequiresExprBodyDecl : public Decl, public DeclContext {
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == RequiresExprBody; }
+
+  static DeclContext *castToDeclContext(const RequiresExprBodyDecl *D) {
+    return static_cast<DeclContext *>(const_cast<RequiresExprBodyDecl *>(D));
+  }
+
+  static RequiresExprBodyDecl *castFromDeclContext(const DeclContext *DC) {
+    return static_cast<RequiresExprBodyDecl *>(const_cast<DeclContext *>(DC));
+  }
 };
 
 /// Represents a static or instance method of a struct/union/class.
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 5e03f0223d311c..b1733c2d052a65 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -930,20 +930,14 @@ const AttrVec &Decl::getAttrs() const {
 
 Decl *Decl::castFromDeclContext (const DeclContext *D) {
   Decl::Kind DK = D->getDeclKind();
-  switch(DK) {
-#define DECL(NAME, BASE)
-#define DECL_CONTEXT(NAME) \
-    case Decl::NAME:       \
-      return static_cast<NAME##Decl *>(const_cast<DeclContext *>(D));
-#define DECL_CONTEXT_BASE(NAME)
-#include "clang/AST/DeclNodes.inc"
-    default:
+  switch (DK) {
 #define DECL(NAME, BASE)
-#define DECL_CONTEXT_BASE(NAME)                  \
-      if (DK >= first##NAME && DK <= last##NAME) \
-        return static_cast<NAME##Decl *>(const_cast<DeclContext *>(D));
+#define DECL_CONTEXT(NAME)                                                     \
+  case Decl::NAME:                                                             \
+    return static_cast<NAME##Decl *>(const_cast<DeclContext *>(D));
 #include "clang/AST/DeclNodes.inc"
-      llvm_unreachable("a decl that inherits DeclContext isn't handled");
+  default:
+    llvm_unreachable("a decl that inherits DeclContext isn't handled");
   }
 }
 
@@ -951,18 +945,12 @@ DeclContext *Decl::castToDeclContext(const Decl *D) {
   Decl::Kind DK = D->getKind();
   switch(DK) {
 #define DECL(NAME, BASE)
-#define DECL_CONTEXT(NAME) \
-    case Decl::NAME:       \
-      return static_cast<NAME##Decl *>(const_cast<Decl *>(D));
-#define DECL_CONTEXT_BASE(NAME)
+#define DECL_CONTEXT(NAME)                                                     \
+  case Decl::NAME:                                                             \
+    return static_cast<NAME##Decl *>(const_cast<Decl *>(D));
 #include "clang/AST/DeclNodes.inc"
-    default:
-#define DECL(NAME, BASE)
-#define DECL_CONTEXT_BASE(NAME)                                   \
-      if (DK >= first##NAME && DK <= last##NAME)                  \
-        return static_cast<NAME##Decl *>(const_cast<Decl *>(D));
-#include "clang/AST/DeclNodes.inc"
-      llvm_unreachable("a decl that inherits DeclContext isn't handled");
+  default:
+    llvm_unreachable("a decl that inherits DeclContext isn't handled");
   }
 }
 
@@ -1129,20 +1117,14 @@ DeclContext::DeclContext(Decl::Kind K) {
 }
 
 bool DeclContext::classof(const Decl *D) {
-  switch (D->getKind()) {
+  Decl::Kind DK = D->getKind();
+  switch (DK) {
 #define DECL(NAME, BASE)
 #define DECL_CONTEXT(NAME) case Decl::NAME:
-#define DECL_CONTEXT_BASE(NAME)
 #include "clang/AST/DeclNodes.inc"
-      return true;
-    default:
-#define DECL(NAME, BASE)
-#define DECL_CONTEXT_BASE(NAME)                 \
-      if (D->getKind() >= Decl::first##NAME &&  \
-          D->getKind() <= Decl::last##NAME)     \
-        return true;
-#include "clang/AST/DeclNodes.inc"
-      return false;
+    return true;
+  default:
+    return false;
   }
 }
 
diff --git a/clang/utils/TableGen/ClangASTNodesEmitter.cpp b/clang/utils/TableGen/ClangASTNodesEmitter.cpp
index 16a1c74b9d91ad..07ddafce329163 100644
--- a/clang/utils/TableGen/ClangASTNodesEmitter.cpp
+++ b/clang/utils/TableGen/ClangASTNodesEmitter.cpp
@@ -33,6 +33,7 @@ class ClangASTNodesEmitter {
   typedef std::multimap<ASTNode, ASTNode> ChildMap;
   typedef ChildMap::const_iterator ChildIterator;
 
+  std::set<ASTNode> PrioritizedClasses;
   RecordKeeper &Records;
   ASTNode Root;
   const std::string &NodeClassName;
@@ -70,8 +71,16 @@ class ClangASTNodesEmitter {
   std::pair<ASTNode, ASTNode> EmitNode(raw_ostream& OS, ASTNode Base);
 public:
   explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N,
-                                const std::string &S)
-    : Records(R), NodeClassName(N), BaseSuffix(S) {}
+                                const std::string &S,
+                                std::string_view PriorizeIfSubclassOf)
+      : Records(R), NodeClassName(N), BaseSuffix(S) {
+    auto vecPrioritized =
+        PriorizeIfSubclassOf.empty()
+            ? std::vector<Record *>{}
+            : R.getAllDerivedDefinitions(PriorizeIfSubclassOf);
+    PrioritizedClasses =
+        std::set<ASTNode>(vecPrioritized.begin(), vecPrioritized.end());
+  }
 
   // run - Output the .inc file contents
   void run(raw_ostream &OS);
@@ -95,8 +104,23 @@ std::pair<ASTNode, ASTNode> ClangASTNodesEmitter::EmitNode(raw_ostream &OS,
   if (!Base.isAbstract())
     First = Last = Base;
 
+  auto comp = [this](ASTNode LHS, ASTNode RHS) {
+    auto LHSPrioritized = PrioritizedClasses.count(LHS) > 0;
+    auto RHSPrioritized = PrioritizedClasses.count(RHS) > 0;
+    if (LHSPrioritized && !RHSPrioritized)
+      return true;
+    if (!LHSPrioritized && RHSPrioritized)
+      return false;
+
+    return LHS.getName() > RHS.getName();
+  };
+  auto SortedChildren = std::set<ASTNode, decltype(comp)>(comp);
+
   for (; i != e; ++i) {
-    ASTNode Child = i->second;
+    SortedChildren.insert(i->second);
+  }
+
+  for (const auto &Child : SortedChildren) {
     bool Abstract = Child.isAbstract();
     std::string NodeName = macroName(std::string(Child.getName()));
 
@@ -148,9 +172,7 @@ void ClangASTNodesEmitter::deriveChildTree() {
   const std::vector<Record*> Stmts
     = Records.getAllDerivedDefinitions(NodeClassName);
 
-  for (unsigned i = 0, e = Stmts.size(); i != e; ++i) {
-    Record *R = Stmts[i];
-
+  for (auto *R : Stmts) {
     if (auto B = R->getValueAsOptionalDef(BaseFieldName))
       Tree.insert(std::make_pair(B, R));
     else if (Root)
@@ -182,9 +204,9 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) {
   OS << "#endif\n\n";
 
   OS << "#ifndef LAST_" << macroHierarchyName() << "_RANGE\n";
-  OS << "#  define LAST_" 
-     << macroHierarchyName() << "_RANGE(Base, First, Last) " 
-     << macroHierarchyName() << "_RANGE(Base, First, Last)\n";
+  OS << "#  define LAST_" << macroHierarchyName()
+     << "_RANGE(Base, First, Last) " << macroHierarchyName()
+     << "_RANGE(Base, First, Last)\n";
   OS << "#endif\n\n";
 
   EmitNode(OS, Root);
@@ -196,8 +218,20 @@ void ClangASTNodesEmitter::run(raw_ostream &OS) {
 }
 
 void clang::EmitClangASTNodes(RecordKeeper &RK, raw_ostream &OS,
-                              const std::string &N, const std::string &S) {
-  ClangASTNodesEmitter(RK, N, S).run(OS);
+                              const std::string &N, const std::string &S,
+                              std::string_view PriorizeIfSubclassOf) {
+  ClangASTNodesEmitter(RK, N, S, PriorizeIfSubclassOf).run(OS);
+}
+
+void printDeclContext(const std::multimap<Record *, Record *> &Tree,
+                      Record *DeclContext, raw_ostream &OS) {
+  if (!DeclContext->getValueAsBit(AbstractFieldName))
+    OS << "DECL_CONTEXT(" << DeclContext->getName() << ")\n";
+  auto i = Tree.lower_bound(DeclContext);
+  auto end = Tree.upper_bound(DeclContext);
+  for (; i != end; ++i) {
+    printDeclContext(Tree, i->second, OS);
+  }
 }
 
 // Emits and addendum to a .inc file to enumerate the clang declaration
@@ -210,38 +244,25 @@ void clang::EmitClangDeclContext(RecordKeeper &Records, raw_ostream &OS) {
   OS << "#ifndef DECL_CONTEXT\n";
   OS << "#  define DECL_CONTEXT(DECL)\n";
   OS << "#endif\n";
-  
-  OS << "#ifndef DECL_CONTEXT_BASE\n";
-  OS << "#  define DECL_CONTEXT_BASE(DECL) DECL_CONTEXT(DECL)\n";
-  OS << "#endif\n";
-  
-  typedef std::set<Record*> RecordSet;
-  typedef std::vector<Record*> RecordVector;
-  
-  RecordVector DeclContextsVector
-    = Records.getAllDerivedDefinitions(DeclContextNodeClassName);
-  RecordVector Decls = Records.getAllDerivedDefinitions(DeclNodeClassName);
-  RecordSet DeclContexts (DeclContextsVector.begin(), DeclContextsVector.end());
-   
-  for (RecordVector::iterator i = Decls.begin(), e = Decls.end(); i != e; ++i) {
-    Record *R = *i;
-
-    if (Record *B = R->getValueAsOptionalDef(BaseFieldName)) {
-      if (DeclContexts.find(B) != DeclContexts.end()) {
-        OS << "DECL_CONTEXT_BASE(" << B->getName() << ")\n";
-        DeclContexts.erase(B);
-      }
-    }
+
+  std::vector<Record *> DeclContextsVector =
+      Records.getAllDerivedDefinitions(DeclContextNodeClassName);
+  std::vector<Record *> Decls =
+      Records.getAllDerivedDefinitions(DeclNodeClassName);
+
+  std::multimap<Record *, Record *> Tree;
+
+  const std::vector<Record *> Stmts =
+      Records.getAllDerivedDefinitions(DeclNodeClassName);
+
+  for (auto *R : Stmts) {
+    if (auto *B = R->getValueAsOptionalDef(BaseFieldName))
+      Tree.insert(std::make_pair(B, R));
   }
 
-  // To keep identical order, RecordVector may be used
-  // instead of RecordSet.
-  for (RecordVector::iterator
-         i = DeclContextsVector.begin(), e = DeclContextsVector.end();
-       i != e; ++i)
-    if (DeclContexts.find(*i) != DeclContexts.end())
-      OS << "DECL_CONTEXT(" << (*i)->getName() << ")\n";
+  for (auto *DeclContext : DeclContextsVector) {
+    printDeclContext(Tree, DeclContext, OS);
+  }
 
   OS << "#undef DECL_CONTEXT\n";
-  OS << "#undef DECL_CONTEXT_BASE\n";
 }
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index c1f2ca15b595c0..3859555d647fd1 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -398,7 +398,8 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
     EmitClangASTNodes(Records, OS, CommentNodeClassName, "");
     break;
   case GenClangDeclNodes:
-    EmitClangASTNodes(Records, OS, DeclNodeClassName, "Decl");
+    EmitClangASTNodes(Records, OS, DeclNodeClassName, "Decl",
+                      DeclContextNodeClassName);
     EmitClangDeclContext(Records, OS);
     break;
   case GenClangStmtNodes:
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 35f2f04c1e818d..faa0c5d2cff9e3 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -25,8 +25,16 @@ class RecordKeeper;
 namespace clang {
 
 void EmitClangDeclContext(llvm::RecordKeeper &RK, llvm::raw_ostream &OS);
+/**
+  @param PriorizeIfSubclassOf These classes should be prioritized in the output.
+  This is useful to force enum generation/jump tables/lookup tables to be more
+  compact in both size and surrounding code in hot functions. An example use is
+  in Decl for classes that inherit from DeclContext, for functions like
+  castFromDeclContext.
+  */
 void EmitClangASTNodes(llvm::RecordKeeper &RK, llvm::raw_ostream &OS,
-                       const std::string &N, const std::string &S);
+                       const std::string &N, const std::string &S,
+                       std::string_view PriorizeIfSubclassOf = "");
 void EmitClangBasicReader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangBasicWriter(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitClangTypeNodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);



More information about the cfe-commits mailing list