[clang] f7e19a5 - [Lex] Keep track of skipped preprocessor blocks and advance the lexer directly if they are revisited

Argyrios Kyrtzidis via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 13 21:46:54 PDT 2022


Author: Argyrios Kyrtzidis
Date: 2022-06-13T21:46:46-07:00
New Revision: f7e19a59284208712314a2d0702b48c445909130

URL: https://github.com/llvm/llvm-project/commit/f7e19a59284208712314a2d0702b48c445909130
DIFF: https://github.com/llvm/llvm-project/commit/f7e19a59284208712314a2d0702b48c445909130.diff

LOG: [Lex] Keep track of skipped preprocessor blocks and advance the lexer directly if they are revisited

This speeds up preprocessing, specifically for preprocessing the clang sources time is reduced by about -36%,
using measurements on M1Pro with a release+thinLTO build.

Differential Revision: https://reviews.llvm.org/D127379

Added: 
    

Modified: 
    clang/include/clang/Lex/Preprocessor.h
    clang/lib/Lex/PPDirectives.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 81d1481e88fa8..9bd4b40a60770 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -952,6 +952,18 @@ class Preprocessor {
   /// of that list.
   MacroInfoChain *MIChainHead = nullptr;
 
+  /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
+  /// This is used to guard against calling this function recursively.
+  ///
+  /// See comments at the use-site for more context about why it is needed.
+  bool SkippingExcludedConditionalBlock = false;
+
+  /// Keeps track of skipped range mappings that were recorded while skipping
+  /// excluded conditional directives. It maps the source buffer pointer at
+  /// the beginning of a skipped block, to the number of bytes that should be
+  /// skipped.
+  llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
+
   void updateOutOfDateIdentifier(IdentifierInfo &II) const;
 
 public:

diff  --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 1356dc097dfcc..70d001fbaec77 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -33,15 +33,16 @@
 #include "clang/Lex/Token.h"
 #include "clang/Lex/VariadicMacroSupport.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SaveAndRestore.h"
 #include <algorithm>
 #include <cassert>
 #include <cstring>
@@ -481,6 +482,19 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
                                                 bool FoundNonSkipPortion,
                                                 bool FoundElse,
                                                 SourceLocation ElseLoc) {
+  // In SkippingRangeStateTy we are depending on SkipExcludedConditionalBlock()
+  // not getting called recursively by storing the RecordedSkippedRanges
+  // DenseMap lookup pointer (field SkipRangePtr). SkippingRangeStateTy expects
+  // that RecordedSkippedRanges won't get modified and SkipRangePtr won't be
+  // invalidated. If this changes and there is a need to call
+  // SkipExcludedConditionalBlock() recursively, SkippingRangeStateTy should
+  // change to do a second lookup in endLexPass function instead of reusing the
+  // lookup pointer.
+  assert(!SkippingExcludedConditionalBlock &&
+         "calling SkipExcludedConditionalBlock recursively");
+  llvm::SaveAndRestore<bool> SARSkipping(SkippingExcludedConditionalBlock,
+                                         true);
+
   ++NumSkipped;
   assert(!CurTokenLexer && CurPPLexer && "Lexing a macro, not a file?");
 
@@ -495,10 +509,53 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
   CurPPLexer->LexingRawMode = true;
   Token Tok;
   SourceLocation endLoc;
+
+  /// Keeps track and caches skipped ranges and also retrieves a prior skipped
+  /// range if the same block is re-visited.
+  struct SkippingRangeStateTy {
+    Preprocessor &PP;
+
+    const char *BeginPtr = nullptr;
+    unsigned *SkipRangePtr = nullptr;
+
+    SkippingRangeStateTy(Preprocessor &PP) : PP(PP) {}
+
+    void beginLexPass() {
+      if (BeginPtr)
+        return; // continue skipping a block.
+
+      // Initiate a skipping block and adjust the lexer if we already skipped it
+      // before.
+      BeginPtr = PP.CurLexer->getBufferLocation();
+      SkipRangePtr = &PP.RecordedSkippedRanges[BeginPtr];
+      if (*SkipRangePtr) {
+        PP.CurLexer->seek(PP.CurLexer->getCurrentBufferOffset() + *SkipRangePtr,
+                          /*IsAtStartOfLine*/ true);
+      }
+    }
+
+    void endLexPass(const char *Hashptr) {
+      if (!BeginPtr) {
+        // Not doing normal lexing.
+        assert(PP.CurLexer->isDependencyDirectivesLexer());
+        return;
+      }
+
+      // Finished skipping a block, record the range if it's first time visited.
+      if (!*SkipRangePtr) {
+        *SkipRangePtr = Hashptr - BeginPtr;
+      }
+      assert(*SkipRangePtr == Hashptr - BeginPtr);
+      BeginPtr = nullptr;
+      SkipRangePtr = nullptr;
+    }
+  } SkippingRangeState(*this);
+
   while (true) {
     if (CurLexer->isDependencyDirectivesLexer()) {
       CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok);
     } else {
+      SkippingRangeState.beginLexPass();
       while (true) {
         CurLexer->Lex(Tok);
 
@@ -537,6 +594,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
     CurPPLexer->ParsingPreprocessorDirective = true;
     if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
 
+    assert(Tok.is(tok::hash));
+    const char *Hashptr = CurLexer->getBufferLocation() - Tok.getLength();
+    assert(CurLexer->getSourceLocation(Hashptr) == Tok.getLocation());
 
     // Read the next token, the directive flavor.
     LexUnexpandedToken(Tok);
@@ -611,6 +671,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
 
         // If we popped the outermost skipping block, we're done skipping!
         if (!CondInfo.WasSkipping) {
+          SkippingRangeState.endLexPass(Hashptr);
           // Restore the value of LexingRawMode so that trailing comments
           // are handled correctly, if we've reached the outermost block.
           CurPPLexer->LexingRawMode = false;
@@ -628,6 +689,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
         // as a non-skipping conditional.
         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
 
+        if (!CondInfo.WasSkipping)
+          SkippingRangeState.endLexPass(Hashptr);
+
         // If this is a #else with a #else before it, report the error.
         if (CondInfo.FoundElse)
           Diag(Tok, diag::pp_err_else_after_else);
@@ -653,6 +717,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
       } else if (Sub == "lif") {  // "elif".
         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
 
+        if (!CondInfo.WasSkipping)
+          SkippingRangeState.endLexPass(Hashptr);
+
         // If this is a #elif with a #else before it, report the error.
         if (CondInfo.FoundElse)
           Diag(Tok, diag::pp_err_elif_after_else) << PED_Elif;
@@ -695,6 +762,9 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
         PPConditionalInfo &CondInfo = CurPPLexer->peekConditionalLevel();
         Token DirectiveToken = Tok;
 
+        if (!CondInfo.WasSkipping)
+          SkippingRangeState.endLexPass(Hashptr);
+
         // Warn if using `#elifdef` & `#elifndef` in not C2x & C++2b mode even
         // if this branch is in a skipping block.
         unsigned DiagID;


        


More information about the cfe-commits mailing list