[clang] 11c1d8b - [Lex] Bring back the magic number 50 in updateConsecutiveMacroArgTokens.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 26 03:07:26 PDT 2022


Author: Haojian Wu
Date: 2022-10-26T12:03:21+02:00
New Revision: 11c1d8b7fd82b32b37db47bcd8eac813b9667b5c

URL: https://github.com/llvm/llvm-project/commit/11c1d8b7fd82b32b37db47bcd8eac813b9667b5c
DIFF: https://github.com/llvm/llvm-project/commit/11c1d8b7fd82b32b37db47bcd8eac813b9667b5c.diff

LOG: [Lex] Bring back the magic number 50 in updateConsecutiveMacroArgTokens.

This patch is a reland of 74e4f778cf16cbf7163b5c6de6027a43f5e9169f and
f83347b0bedb22ea676861c8e4e2ed9c31371ade with the removed 50 trick back.

The magic number 50 was removed in D134942, as a behavior change for
performance reason.

While it reduces the number of SLocEntry, it increases the usage of
SourceLocation address space usage, which is critical for compiling
large TU.

This fixes a regression caused in D134942 -- clang failed to compile one of
our internal files, complaining the file is too large to process because clang
runs out of source location space (we spend 40% more address space!)

Differential Revision: https://reviews.llvm.org/D136539

Added: 
    clang/test/Lexer/update_consecutive_macro_address_space.c

Modified: 
    clang/lib/Lex/TokenLexer.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index efda6d0046fa1..0d141c662b95f 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -25,6 +25,7 @@
 #include "clang/Lex/Token.h"
 #include "clang/Lex/VariadicMacroSupport.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
@@ -984,65 +985,71 @@ TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
 /// \arg begin_tokens will be updated to a position past all the found
 /// consecutive tokens.
 static void updateConsecutiveMacroArgTokens(SourceManager &SM,
-                                            SourceLocation InstLoc,
+                                            SourceLocation ExpandLoc,
                                             Token *&begin_tokens,
                                             Token * end_tokens) {
-  assert(begin_tokens < end_tokens);
-
-  SourceLocation FirstLoc = begin_tokens->getLocation();
-  SourceLocation CurLoc = FirstLoc;
-
-  // Compare the source location offset of tokens and group together tokens that
-  // are close, even if their locations point to 
diff erent FileIDs. e.g.
-  //
-  //  |bar    |  foo | cake   |  (3 tokens from 3 consecutive FileIDs)
-  //  ^                    ^
-  //  |bar       foo   cake|     (one SLocEntry chunk for all tokens)
-  //
-  // we can perform this "merge" since the token's spelling location depends
-  // on the relative offset.
-
-  Token *NextTok = begin_tokens + 1;
-  for (; NextTok < end_tokens; ++NextTok) {
-    SourceLocation NextLoc = NextTok->getLocation();
-    if (CurLoc.isFileID() != NextLoc.isFileID())
-      break; // Token from 
diff erent kind of FileID.
-
-    SourceLocation::IntTy RelOffs;
-    if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
-      break; // Token from 
diff erent local/loaded location.
-    // Check that token is not before the previous token or more than 50
-    // "characters" away.
-    if (RelOffs < 0 || RelOffs > 50)
-      break;
-
-    if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc))
-      break; // Token from a 
diff erent macro.
-
-    CurLoc = NextLoc;
+  assert(begin_tokens + 1 < end_tokens);
+  SourceLocation BeginLoc = begin_tokens->getLocation();
+  llvm::MutableArrayRef<Token> All(begin_tokens, end_tokens);
+  llvm::MutableArrayRef<Token> Partition;
+
+  auto NearLast = [&, Last = BeginLoc](SourceLocation Loc) mutable {
+    // The maximum distance between two consecutive tokens in a partition.
+    // This is an important trick to avoid using too much SourceLocation address
+    // space!
+    static constexpr SourceLocation::IntTy MaxDistance = 50;
+    auto Distance = Loc.getRawEncoding() - Last.getRawEncoding();
+    Last = Loc;
+    return Distance <= MaxDistance;
+  };
+
+  // Partition the tokens by their FileID.
+  // This is a hot function, and calling getFileID can be expensive, the
+  // implementation is optimized by reducing the number of getFileID.
+  if (BeginLoc.isFileID()) {
+    // Consecutive tokens not written in macros must be from the same file.
+    // (Neither #include nor eof can occur inside a macro argument.)
+    Partition = All.take_while([&](const Token &T) {
+      return T.getLocation().isFileID() && NearLast(T.getLocation());
+    });
+  } else {
+    // Call getFileID once to calculate the bounds, and use the cheaper
+    // sourcelocation-against-bounds comparison.
+    FileID BeginFID = SM.getFileID(BeginLoc);
+    SourceLocation Limit =
+        SM.getComposedLoc(BeginFID, SM.getFileIDSize(BeginFID));
+    Partition = All.take_while([&](const Token &T) {
+      return T.getLocation() >= BeginLoc && T.getLocation() < Limit &&
+             NearLast(T.getLocation());
+    });
   }
+  assert(!Partition.empty());
 
   // For the consecutive tokens, find the length of the SLocEntry to contain
   // all of them.
-  Token &LastConsecutiveTok = *(NextTok-1);
-  SourceLocation::IntTy LastRelOffs = 0;
-  SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
-                           &LastRelOffs);
   SourceLocation::UIntTy FullLength =
-      LastRelOffs + LastConsecutiveTok.getLength();
-
+      Partition.back().getEndLoc().getRawEncoding() -
+      Partition.front().getLocation().getRawEncoding();
   // Create a macro expansion SLocEntry that will "contain" all of the tokens.
   SourceLocation Expansion =
-      SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
-
+      SM.createMacroArgExpansionLoc(BeginLoc, ExpandLoc, FullLength);
+
+#ifdef EXPENSIVE_CHECKS
+  assert(llvm::all_of(Partition.drop_front(),
+                      [&SM, ID = SM.getFileID(Partition.front().getLocation())](
+                          const Token &T) {
+                        return ID == SM.getFileID(T.getLocation());
+                      }) &&
+         "Must have the same FIleID!");
+#endif
   // Change the location of the tokens from the spelling location to the new
   // expanded location.
-  for (; begin_tokens < NextTok; ++begin_tokens) {
-    Token &Tok = *begin_tokens;
-    SourceLocation::IntTy RelOffs = 0;
-    SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
-    Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
+  for (Token& T : Partition) {
+    SourceLocation::IntTy RelativeOffset =
+        T.getLocation().getRawEncoding() - BeginLoc.getRawEncoding();
+    T.setLocation(Expansion.getLocWithOffset(RelativeOffset));
   }
+  begin_tokens = &Partition.back() + 1;
 }
 
 /// Creates SLocEntries and updates the locations of macro argument
@@ -1055,7 +1062,7 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
                                             Token *end_tokens) {
   SourceManager &SM = PP.getSourceManager();
 
-  SourceLocation InstLoc =
+  SourceLocation ExpandLoc =
       getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
 
   while (begin_tokens < end_tokens) {
@@ -1063,12 +1070,12 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
     if (end_tokens - begin_tokens == 1) {
       Token &Tok = *begin_tokens;
       Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
-                                                    InstLoc,
+                                                    ExpandLoc,
                                                     Tok.getLength()));
       return;
     }
 
-    updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
+    updateConsecutiveMacroArgTokens(SM, ExpandLoc, begin_tokens, end_tokens);
   }
 }
 

diff  --git a/clang/test/Lexer/update_consecutive_macro_address_space.c b/clang/test/Lexer/update_consecutive_macro_address_space.c
new file mode 100644
index 0000000000000..cf3c406112b25
--- /dev/null
+++ b/clang/test/Lexer/update_consecutive_macro_address_space.c
@@ -0,0 +1,36 @@
+// RUN: %clang -cc1 -print-stats %s 2>&1 | FileCheck %s
+// CHECK: 6 local SLocEntry's allocated
+//
+// Verify that the macro arg expansion is split to two file ids, we have 6 file
+// ids rather than 5:
+//   0: invalid file id
+//   1: main file
+//   2: builtin file
+//   3: macro expansion for X
+//   4: macro arg expansions for 1
+//   5: macro arg expansions for == 2
+#define X(x) (int)(x);
+void func() {
+  X(1
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+/*************************************************************************************************/
+== 2);
+}


        


More information about the cfe-commits mailing list