[clang] [clang] Change representation of CurLexerKind (PR #70381)

via cfe-commits cfe-commits at lists.llvm.org
Sun Oct 29 12:02:19 PDT 2023


https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/70381

>From 3fe63f81fcb999681daa11b2890c82fda3aaeef5 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Thu, 26 Oct 2023 22:31:43 +0200
Subject: [PATCH 1/3] [clang] Change representation of CurLexerKind

Previous representation used an enumeration combined to a switch to
dispatch to the appropriate lexer.

Use function pointer so that the dispatching is just an indirect call,
which is actually better because lexing is a costly task compared to a
function call.

This also makes the code slightly cleaner, speedup on
compile time tracker are consistent and range form -0.05% to -0.20%
for NewPM-O0-g, see

        https://llvm-compile-time-tracker.com/compare.php?from=f9906508bc4f05d3950e2219b4c56f6c078a61ef&to=608c85ec1283638db949d73e062bcc3355001ce4&stat=instructions:u

Considering just the preprocessing task, preprocessing the sqlite
amalgametion takes -0.6% instructions (according to valgrind
--tool=callgrind)
---
 clang/include/clang/Lex/Preprocessor.h    | 46 +++++++++++-----
 clang/lib/Lex/PPCaching.cpp               |  8 +--
 clang/lib/Lex/PPLexerChange.cpp           | 20 +++----
 clang/lib/Lex/Preprocessor.cpp            | 67 ++++++-----------------
 clang/utils/ClangVisualizers/clang.natvis |  2 +-
 5 files changed, 62 insertions(+), 81 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 18d88407ae12c90..634d3924aa2248b 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -751,13 +751,8 @@ class Preprocessor {
   std::unique_ptr<TokenLexer> CurTokenLexer;
 
   /// The kind of lexer we're currently working with.
-  enum CurLexerKind {
-    CLK_Lexer,
-    CLK_TokenLexer,
-    CLK_CachingLexer,
-    CLK_DependencyDirectivesLexer,
-    CLK_LexAfterModuleImport
-  } CurLexerKind = CLK_Lexer;
+  typedef bool (*LexerCallback)(Preprocessor &, Token &);
+  LexerCallback CurLexerCallback = &CLK_Lexer;
 
   /// If the current lexer is for a submodule that is being built, this
   /// is that submodule.
@@ -767,7 +762,7 @@ class Preprocessor {
   /// \#included, and macros currently being expanded from, not counting
   /// CurLexer/CurTokenLexer.
   struct IncludeStackInfo {
-    enum CurLexerKind           CurLexerKind;
+    LexerCallback CurLexerCallback;
     Module                     *TheSubmodule;
     std::unique_ptr<Lexer>      TheLexer;
     PreprocessorLexer          *ThePPLexer;
@@ -776,12 +771,12 @@ class Preprocessor {
 
     // The following constructors are completely useless copies of the default
     // versions, only needed to pacify MSVC.
-    IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
+    IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
                      std::unique_ptr<Lexer> &&TheLexer,
                      PreprocessorLexer *ThePPLexer,
                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
                      ConstSearchDirIterator TheDirLookup)
-        : CurLexerKind(std::move(CurLexerKind)),
+        : CurLexerCallback(std::move(CurLexerCallback)),
           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
           ThePPLexer(std::move(ThePPLexer)),
           TheTokenLexer(std::move(TheTokenLexer)),
@@ -1901,7 +1896,7 @@ class Preprocessor {
   /// Determine whether it's possible for a future call to Lex to produce an
   /// annotation token created by a previous call to EnterAnnotationToken.
   bool mightHavePendingAnnotationTokens() {
-    return CurLexerKind != CLK_Lexer;
+    return CurLexerCallback != CLK_Lexer;
   }
 
   /// Update the current token to represent the provided
@@ -1914,7 +1909,7 @@ class Preprocessor {
 
   /// Recompute the current lexer kind based on the CurLexer/
   /// CurTokenLexer pointers.
-  void recomputeCurLexerKind();
+  void recomputeCurLexerCallback();
 
   /// Returns true if incremental processing is enabled
   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
@@ -2430,8 +2425,9 @@ class Preprocessor {
   friend void TokenLexer::ExpandFunctionArguments();
 
   void PushIncludeMacroStack() {
-    assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
-    IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
+    assert(CurLexerCallback != CLK_CachingLexer &&
+           "cannot push a caching lexer");
+    IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
                                    std::move(CurLexer), CurPPLexer,
                                    std::move(CurTokenLexer), CurDirLookup);
     CurPPLexer = nullptr;
@@ -2443,7 +2439,7 @@ class Preprocessor {
     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
-    CurLexerKind = IncludeMacroStack.back().CurLexerKind;
+    CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
     IncludeMacroStack.pop_back();
   }
 
@@ -2899,6 +2895,26 @@ class Preprocessor {
   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
   ///          opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
+
+private:
+  /// Helper functions to forward lexing to the actual lexer. They all share the
+  /// same signature.
+  static bool CLK_Lexer(Preprocessor &P, Token &Result) {
+    return P.CurLexer->Lex(Result);
+  }
+  static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
+    return P.CurTokenLexer->Lex(Result);
+  }
+  static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
+    P.CachingLex(Result);
+    return true;
+  }
+  static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
+    return P.CurLexer->LexDependencyDirectiveToken(Result);
+  }
+  static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
+    return P.LexAfterModuleImport(Result);
+  }
 };
 
 /// Abstract base class that describes a handler that will receive
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
index e05e52ba9bb5362..b8219835a588bea 100644
--- a/clang/lib/Lex/PPCaching.cpp
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -42,7 +42,7 @@ void Preprocessor::Backtrack() {
          && "EnableBacktrackAtThisPos was not called!");
   CachedLexPos = BacktrackPositions.back();
   BacktrackPositions.pop_back();
-  recomputeCurLexerKind();
+  recomputeCurLexerCallback();
 }
 
 void Preprocessor::CachingLex(Token &Result) {
@@ -88,7 +88,7 @@ void Preprocessor::EnterCachingLexMode() {
          "entered caching lex mode while lexing something else");
 
   if (InCachingLexMode()) {
-    assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
+    assert(CurLexerCallback == CLK_CachingLexer && "Unexpected lexer kind");
     return;
   }
 
@@ -96,9 +96,9 @@ void Preprocessor::EnterCachingLexMode() {
 }
 
 void Preprocessor::EnterCachingLexModeUnchecked() {
-  assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode");
+  assert(CurLexerCallback != CLK_CachingLexer && "already in caching lex mode");
   PushIncludeMacroStack();
-  CurLexerKind = CLK_CachingLexer;
+  CurLexerCallback = CLK_CachingLexer;
 }
 
 
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index b8575e1adfc5b3f..1378880f8b0e5e0 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -122,10 +122,10 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
   CurPPLexer = TheLexer;
   CurDirLookup = CurDir;
   CurLexerSubmodule = nullptr;
-  if (CurLexerKind != CLK_LexAfterModuleImport)
-    CurLexerKind = TheLexer->isDependencyDirectivesLexer()
-                       ? CLK_DependencyDirectivesLexer
-                       : CLK_Lexer;
+  if (CurLexerCallback != CLK_LexAfterModuleImport)
+    CurLexerCallback = TheLexer->isDependencyDirectivesLexer()
+                           ? CLK_DependencyDirectivesLexer
+                           : CLK_Lexer;
 
   // Notify the client, if desired, that we are in a new source file.
   if (Callbacks && !CurLexer->Is_PragmaLexer) {
@@ -161,8 +161,8 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
   PushIncludeMacroStack();
   CurDirLookup = nullptr;
   CurTokenLexer = std::move(TokLexer);
-  if (CurLexerKind != CLK_LexAfterModuleImport)
-    CurLexerKind = CLK_TokenLexer;
+  if (CurLexerCallback != CLK_LexAfterModuleImport)
+    CurLexerCallback = CLK_TokenLexer;
 }
 
 /// EnterTokenStream - Add a "macro" context to the top of the include stack,
@@ -180,7 +180,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd,
 void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
                                     bool DisableMacroExpansion, bool OwnsTokens,
                                     bool IsReinject) {
-  if (CurLexerKind == CLK_CachingLexer) {
+  if (CurLexerCallback == CLK_CachingLexer) {
     if (CachedLexPos < CachedTokens.size()) {
       assert(IsReinject && "new tokens in the middle of cached stream");
       // We're entering tokens into the middle of our cached token stream. We
@@ -216,8 +216,8 @@ void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
   PushIncludeMacroStack();
   CurDirLookup = nullptr;
   CurTokenLexer = std::move(TokLexer);
-  if (CurLexerKind != CLK_LexAfterModuleImport)
-    CurLexerKind = CLK_TokenLexer;
+  if (CurLexerCallback != CLK_LexAfterModuleImport)
+    CurLexerCallback = CLK_TokenLexer;
 }
 
 /// Compute the relative path that names the given file relative to
@@ -452,7 +452,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
       CurLexer.reset();
 
       CurPPLexer = nullptr;
-      recomputeCurLexerKind();
+      recomputeCurLexerCallback();
       return true;
     }
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index ede4c51487ffbe7..f791b4d2363c9fe 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -380,15 +380,15 @@ StringRef Preprocessor::getLastMacroWithSpelling(
   return BestSpelling;
 }
 
-void Preprocessor::recomputeCurLexerKind() {
+void Preprocessor::recomputeCurLexerCallback() {
   if (CurLexer)
-    CurLexerKind = CurLexer->isDependencyDirectivesLexer()
-                       ? CLK_DependencyDirectivesLexer
-                       : CLK_Lexer;
+    CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
+                           ? CLK_DependencyDirectivesLexer
+                           : CLK_Lexer;
   else if (CurTokenLexer)
-    CurLexerKind = CLK_TokenLexer;
+    CurLexerCallback = CLK_TokenLexer;
   else
-    CurLexerKind = CLK_CachingLexer;
+    CurLexerCallback = CLK_CachingLexer;
 }
 
 bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
@@ -643,23 +643,7 @@ void Preprocessor::SkipTokensWhileUsingPCH() {
   while (true) {
     bool InPredefines =
         (CurLexer && CurLexer->getFileID() == getPredefinesFileID());
-    switch (CurLexerKind) {
-    case CLK_Lexer:
-      CurLexer->Lex(Tok);
-     break;
-    case CLK_TokenLexer:
-      CurTokenLexer->Lex(Tok);
-      break;
-    case CLK_CachingLexer:
-      CachingLex(Tok);
-      break;
-    case CLK_DependencyDirectivesLexer:
-      CurLexer->LexDependencyDirectiveToken(Tok);
-      break;
-    case CLK_LexAfterModuleImport:
-      LexAfterModuleImport(Tok);
-      break;
-    }
+    CurLexerCallback(*this, Tok);
     if (Tok.is(tok::eof) && !InPredefines) {
       ReachedMainFileEOF = true;
       break;
@@ -868,12 +852,12 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
        Identifier.is(tok::kw_import)) &&
       !InMacroArgs && !DisableMacroExpansion &&
       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
-      CurLexerKind != CLK_CachingLexer) {
+      CurLexerCallback != CLK_CachingLexer) {
     ModuleImportLoc = Identifier.getLocation();
     NamedModuleImportPath.clear();
     IsAtImport = true;
     ModuleImportExpectsIdentifier = true;
-    CurLexerKind = CLK_LexAfterModuleImport;
+    CurLexerCallback = CLK_LexAfterModuleImport;
   }
   return true;
 }
@@ -882,27 +866,8 @@ void Preprocessor::Lex(Token &Result) {
   ++LexLevel;
 
   // We loop here until a lex function returns a token; this avoids recursion.
-  bool ReturnedToken;
-  do {
-    switch (CurLexerKind) {
-    case CLK_Lexer:
-      ReturnedToken = CurLexer->Lex(Result);
-      break;
-    case CLK_TokenLexer:
-      ReturnedToken = CurTokenLexer->Lex(Result);
-      break;
-    case CLK_CachingLexer:
-      CachingLex(Result);
-      ReturnedToken = true;
-      break;
-    case CLK_DependencyDirectivesLexer:
-      ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result);
-      break;
-    case CLK_LexAfterModuleImport:
-      ReturnedToken = LexAfterModuleImport(Result);
-      break;
-    }
-  } while (!ReturnedToken);
+  while (!CurLexerCallback(*this, Result))
+    ;
 
   if (Result.is(tok::unknown) && TheModuleLoader.HadFatalFailure)
     return;
@@ -965,7 +930,7 @@ void Preprocessor::Lex(Token &Result) {
           NamedModuleImportPath.clear();
           IsAtImport = false;
           ModuleImportExpectsIdentifier = true;
-          CurLexerKind = CLK_LexAfterModuleImport;
+          CurLexerCallback = CLK_LexAfterModuleImport;
         }
         break;
       } else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
@@ -1166,7 +1131,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
 /// We respond to a pp-import by importing macros from the named module.
 bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // Figure out what kind of lexer we actually have.
-  recomputeCurLexerKind();
+  recomputeCurLexerCallback();
 
   // Lex the next token. The header-name lexing rules are used at the start of
   // a pp-import.
@@ -1183,7 +1148,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
       Name += ":";
       NamedModuleImportPath.push_back(
           {getIdentifierInfo(Name), Result.getLocation()});
-      CurLexerKind = CLK_LexAfterModuleImport;
+      CurLexerCallback = CLK_LexAfterModuleImport;
       return true;
     }
   } else {
@@ -1283,7 +1248,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
     NamedModuleImportPath.push_back(
         std::make_pair(Result.getIdentifierInfo(), Result.getLocation()));
     ModuleImportExpectsIdentifier = false;
-    CurLexerKind = CLK_LexAfterModuleImport;
+    CurLexerCallback = CLK_LexAfterModuleImport;
     return true;
   }
 
@@ -1292,7 +1257,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // attribute-specifier-seq here under the Standard C++ Modules.)
   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
     ModuleImportExpectsIdentifier = true;
-    CurLexerKind = CLK_LexAfterModuleImport;
+    CurLexerCallback = CLK_LexAfterModuleImport;
     return true;
   }
 
diff --git a/clang/utils/ClangVisualizers/clang.natvis b/clang/utils/ClangVisualizers/clang.natvis
index cbb63dc08de2338..44b43ec05d8af59 100644
--- a/clang/utils/ClangVisualizers/clang.natvis
+++ b/clang/utils/ClangVisualizers/clang.natvis
@@ -817,7 +817,7 @@ For later versions of Visual Studio, no setup is required-->
     <DisplayString IncludeView="cached"> {IncludeMacroStack._Mypair._Myval2._Mylast - 1,na}</DisplayString>
     <DisplayString Condition="CurLexer._Mypair._Myval2 != 0">{CurLexer._Mypair._Myval2,na}</DisplayString>
     <DisplayString Condition="CurTokenLexer._Mypair._Myval2 != 0">Expanding Macro: {CurTokenLexer._Mypair._Myval2,na}</DisplayString>
-    <!-- Can't use CurLexerKind because natvis sees the type rather than the variable -->
+    <!-- Can't use CurLexerCallback because natvis sees the type rather than the variable -->
     <DisplayString Condition="IncludeMacroStack._Mypair._Myval2._Mylast - IncludeMacroStack._Mypair._Myval2._Myfirst">
       {this,view(cached)}
     </DisplayString>

>From 7ad697255b075ecb18d09bdb6b465f4e1956909b Mon Sep 17 00:00:00 2001
From: serge-sans-paille <serge.guelton at telecom-bretagne.eu>
Date: Sun, 29 Oct 2023 18:17:55 +0000
Subject: [PATCH 2/3] fixup! [clang] Change representation of CurLexerKind

Co-authored-by: cor3ntin <corentinjabot at gmail.com>
---
 clang/include/clang/Lex/Preprocessor.h | 2 +-
 clang/lib/Lex/PPCaching.cpp            | 2 +-
 clang/lib/Lex/PPLexerChange.cpp        | 2 +-
 clang/lib/Lex/Preprocessor.cpp         | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 634d3924aa2248b..3019143731db3ba 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1909,7 +1909,7 @@ class Preprocessor {
 
   /// Recompute the current lexer kind based on the CurLexer/
   /// CurTokenLexer pointers.
-  void recomputeCurLexerCallback();
+ void recomputeCurLexerKind();
 
   /// Returns true if incremental processing is enabled
   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
index b8219835a588bea..f38ff62ebf437ce 100644
--- a/clang/lib/Lex/PPCaching.cpp
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -42,7 +42,7 @@ void Preprocessor::Backtrack() {
          && "EnableBacktrackAtThisPos was not called!");
   CachedLexPos = BacktrackPositions.back();
   BacktrackPositions.pop_back();
-  recomputeCurLexerCallback();
+  recomputeCurLexerKind();
 }
 
 void Preprocessor::CachingLex(Token &Result) {
diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp
index 1378880f8b0e5e0..3b1b6df1dbae4e6 100644
--- a/clang/lib/Lex/PPLexerChange.cpp
+++ b/clang/lib/Lex/PPLexerChange.cpp
@@ -452,7 +452,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
       CurLexer.reset();
 
       CurPPLexer = nullptr;
-      recomputeCurLexerCallback();
+      recomputeCurLexerKind();
       return true;
     }
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index f791b4d2363c9fe..e37e370f5526b38 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -380,7 +380,7 @@ StringRef Preprocessor::getLastMacroWithSpelling(
   return BestSpelling;
 }
 
-void Preprocessor::recomputeCurLexerCallback() {
+void Preprocessor::recomputeCurLexerKind() {
   if (CurLexer)
     CurLexerCallback = CurLexer->isDependencyDirectivesLexer()
                            ? CLK_DependencyDirectivesLexer
@@ -1131,7 +1131,7 @@ void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
 /// We respond to a pp-import by importing macros from the named module.
 bool Preprocessor::LexAfterModuleImport(Token &Result) {
   // Figure out what kind of lexer we actually have.
-  recomputeCurLexerCallback();
+  recomputeCurLexerKind();
 
   // Lex the next token. The header-name lexing rules are used at the start of
   // a pp-import.

>From 5c35d1dba5d51e22cf9823e0e028e4780dec9370 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton at mozilla.com>
Date: Sun, 29 Oct 2023 20:01:48 +0100
Subject: [PATCH 3/3] fixup! [clang] Change representation of CurLexerKind

---
 clang/include/clang/Lex/Preprocessor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 3019143731db3ba..2d59918c039b115 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1909,7 +1909,7 @@ class Preprocessor {
 
   /// Recompute the current lexer kind based on the CurLexer/
   /// CurTokenLexer pointers.
- void recomputeCurLexerKind();
+  void recomputeCurLexerKind();
 
   /// Returns true if incremental processing is enabled
   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }



More information about the cfe-commits mailing list