[clang] 3f05192 - Revert "[Preprocessor] Reduce the memory overhead of `#define` directives"

Alex Lorenz via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 11 15:54:26 PST 2022


Author: Alex Lorenz
Date: 2022-02-11T15:53:16-08:00
New Revision: 3f05192c4c40bc79b1db431a7a36baaa9491eebb

URL: https://github.com/llvm/llvm-project/commit/3f05192c4c40bc79b1db431a7a36baaa9491eebb
DIFF: https://github.com/llvm/llvm-project/commit/3f05192c4c40bc79b1db431a7a36baaa9491eebb.diff

LOG: Revert "[Preprocessor] Reduce the memory overhead of `#define` directives"

This reverts commit 0d9b91524ea4db3760791bba15773c386a26d8ec.

This change broke LLDB's build. I will need to recommit after fixing LLDB.

Added: 
    

Modified: 
    clang/include/clang/Lex/MacroInfo.h
    clang/lib/Lex/MacroInfo.cpp
    clang/lib/Lex/PPDirectives.cpp
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/unittests/Lex/CMakeLists.txt

Removed: 
    clang/unittests/Lex/PPMemoryAllocationsTest.cpp


################################################################################
diff  --git a/clang/include/clang/Lex/MacroInfo.h b/clang/include/clang/Lex/MacroInfo.h
index 1947bc8fc509e..0347a7a37186b 100644
--- a/clang/include/clang/Lex/MacroInfo.h
+++ b/clang/include/clang/Lex/MacroInfo.h
@@ -54,14 +54,11 @@ class MacroInfo {
   /// macro, this includes the \c __VA_ARGS__ identifier on the list.
   IdentifierInfo **ParameterList = nullptr;
 
-  /// This is the list of tokens that the macro is defined to.
-  const Token *ReplacementTokens = nullptr;
-
   /// \see ParameterList
   unsigned NumParameters = 0;
 
-  /// \see ReplacementTokens
-  unsigned NumReplacementTokens = 0;
+  /// This is the list of tokens that the macro is defined to.
+  SmallVector<Token, 8> ReplacementTokens;
 
   /// Length in characters of the macro definition.
   mutable unsigned DefinitionLength;
@@ -233,47 +230,26 @@ class MacroInfo {
   bool isWarnIfUnused() const { return IsWarnIfUnused; }
 
   /// Return the number of tokens that this macro expands to.
-  unsigned getNumTokens() const { return NumReplacementTokens; }
+  unsigned getNumTokens() const { return ReplacementTokens.size(); }
 
   const Token &getReplacementToken(unsigned Tok) const {
-    assert(Tok < NumReplacementTokens && "Invalid token #");
+    assert(Tok < ReplacementTokens.size() && "Invalid token #");
     return ReplacementTokens[Tok];
   }
 
-  using const_tokens_iterator = const Token *;
+  using tokens_iterator = SmallVectorImpl<Token>::const_iterator;
 
-  const_tokens_iterator tokens_begin() const { return ReplacementTokens; }
-  const_tokens_iterator tokens_end() const {
-    return ReplacementTokens + NumReplacementTokens;
-  }
-  bool tokens_empty() const { return NumReplacementTokens == 0; }
-  ArrayRef<Token> tokens() const {
-    return llvm::makeArrayRef(ReplacementTokens, NumReplacementTokens);
-  }
+  tokens_iterator tokens_begin() const { return ReplacementTokens.begin(); }
+  tokens_iterator tokens_end() const { return ReplacementTokens.end(); }
+  bool tokens_empty() const { return ReplacementTokens.empty(); }
+  ArrayRef<Token> tokens() const { return ReplacementTokens; }
 
-  llvm::MutableArrayRef<Token>
-  allocateTokens(unsigned NumTokens, llvm::BumpPtrAllocator &PPAllocator) {
-    assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 &&
-           "Token list already allocated!");
-    NumReplacementTokens = NumTokens;
-    Token *NewReplacementTokens = PPAllocator.Allocate<Token>(NumTokens);
-    ReplacementTokens = NewReplacementTokens;
-    return llvm::makeMutableArrayRef(NewReplacementTokens, NumTokens);
-  }
-
-  void setTokens(ArrayRef<Token> Tokens, llvm::BumpPtrAllocator &PPAllocator) {
+  /// Add the specified token to the replacement text for the macro.
+  void AddTokenToBody(const Token &Tok) {
     assert(
         !IsDefinitionLengthCached &&
         "Changing replacement tokens after definition length got calculated");
-    assert(ReplacementTokens == nullptr && NumReplacementTokens == 0 &&
-           "Token list already set!");
-    if (Tokens.empty())
-      return;
-
-    NumReplacementTokens = Tokens.size();
-    Token *NewReplacementTokens = PPAllocator.Allocate<Token>(Tokens.size());
-    std::copy(Tokens.begin(), Tokens.end(), NewReplacementTokens);
-    ReplacementTokens = NewReplacementTokens;
+    ReplacementTokens.push_back(Tok);
   }
 
   /// Return true if this macro is enabled.

diff  --git a/clang/lib/Lex/MacroInfo.cpp b/clang/lib/Lex/MacroInfo.cpp
index f5702130d1819..1ccd140364aeb 100644
--- a/clang/lib/Lex/MacroInfo.cpp
+++ b/clang/lib/Lex/MacroInfo.cpp
@@ -28,25 +28,6 @@
 
 using namespace clang;
 
-namespace {
-
-// MacroInfo is expected to take 40 bytes on platforms with an 8 byte pointer
-// and 4 byte SourceLocation.
-template <int> class MacroInfoSizeChecker {
-public:
-  constexpr static bool AsExpected = true;
-};
-template <> class MacroInfoSizeChecker<8> {
-public:
-  constexpr static bool AsExpected =
-      sizeof(MacroInfo) == (32 + sizeof(SourceLocation) * 2);
-};
-
-static_assert(MacroInfoSizeChecker<sizeof(void *)>::AsExpected,
-              "Unexpected size of MacroInfo");
-
-} // end namespace
-
 MacroInfo::MacroInfo(SourceLocation DefLoc)
     : Location(DefLoc), IsDefinitionLengthCached(false), IsFunctionLike(false),
       IsC99Varargs(false), IsGNUVarargs(false), IsBuiltinMacro(false),
@@ -58,7 +39,6 @@ unsigned MacroInfo::getDefinitionLengthSlow(const SourceManager &SM) const {
   assert(!IsDefinitionLengthCached);
   IsDefinitionLengthCached = true;
 
-  ArrayRef<Token> ReplacementTokens = tokens();
   if (ReplacementTokens.empty())
     return (DefinitionLength = 0);
 
@@ -96,7 +76,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
   bool Lexically = !Syntactically;
 
   // Check # tokens in replacement, number of args, and various flags all match.
-  if (getNumTokens() != Other.getNumTokens() ||
+  if (ReplacementTokens.size() != Other.ReplacementTokens.size() ||
       getNumParams() != Other.getNumParams() ||
       isFunctionLike() != Other.isFunctionLike() ||
       isC99Varargs() != Other.isC99Varargs() ||
@@ -112,7 +92,7 @@ bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP,
   }
 
   // Check all the tokens.
-  for (unsigned i = 0; i != NumReplacementTokens; ++i) {
+  for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) {
     const Token &A = ReplacementTokens[i];
     const Token &B = Other.ReplacementTokens[i];
     if (A.getKind() != B.getKind())
@@ -177,7 +157,7 @@ LLVM_DUMP_METHOD void MacroInfo::dump() const {
   }
 
   bool First = true;
-  for (const Token &Tok : tokens()) {
+  for (const Token &Tok : ReplacementTokens) {
     // Leading space is semantically meaningful in a macro definition,
     // so preserve it in the dump output.
     if (First || Tok.hasLeadingSpace())

diff  --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index ae5e4fc75c157..f3aefdd22b514 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2711,14 +2711,12 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
   if (!Tok.is(tok::eod))
     LastTok = Tok;
 
-  SmallVector<Token, 16> Tokens;
-
   // Read the rest of the macro body.
   if (MI->isObjectLike()) {
     // Object-like macros are very simple, just read their body.
     while (Tok.isNot(tok::eod)) {
       LastTok = Tok;
-      Tokens.push_back(Tok);
+      MI->AddTokenToBody(Tok);
       // Get the next token of the macro.
       LexUnexpandedToken(Tok);
     }
@@ -2733,7 +2731,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
       LastTok = Tok;
 
       if (!Tok.isOneOf(tok::hash, tok::hashat, tok::hashhash)) {
-        Tokens.push_back(Tok);
+        MI->AddTokenToBody(Tok);
 
         if (VAOCtx.isVAOptToken(Tok)) {
           // If we're already within a VAOPT, emit an error.
@@ -2747,7 +2745,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
             Diag(Tok, diag::err_pp_missing_lparen_in_vaopt_use);
             return nullptr;
           }
-          Tokens.push_back(Tok);
+          MI->AddTokenToBody(Tok);
           VAOCtx.sawVAOptFollowedByOpeningParens(Tok.getLocation());
           LexUnexpandedToken(Tok);
           if (Tok.is(tok::hashhash)) {
@@ -2758,10 +2756,10 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
         } else if (VAOCtx.isInVAOpt()) {
           if (Tok.is(tok::r_paren)) {
             if (VAOCtx.sawClosingParen()) {
-              assert(Tokens.size() >= 3 &&
-                     "Must have seen at least __VA_OPT__( "
-                     "and a subsequent tok::r_paren");
-              if (Tokens[Tokens.size() - 2].is(tok::hashhash)) {
+              const unsigned NumTokens = MI->getNumTokens();
+              assert(NumTokens >= 3 && "Must have seen at least __VA_OPT__( "
+                                       "and a subsequent tok::r_paren");
+              if (MI->getReplacementToken(NumTokens - 2).is(tok::hashhash)) {
                 Diag(Tok, diag::err_vaopt_paste_at_end);
                 return nullptr;
               }
@@ -2780,7 +2778,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
       // things.
       if (getLangOpts().TraditionalCPP) {
         Tok.setKind(tok::unknown);
-        Tokens.push_back(Tok);
+        MI->AddTokenToBody(Tok);
 
         // Get the next token of the macro.
         LexUnexpandedToken(Tok);
@@ -2796,16 +2794,17 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
         LexUnexpandedToken(Tok);
 
         if (Tok.is(tok::eod)) {
-          Tokens.push_back(LastTok);
+          MI->AddTokenToBody(LastTok);
           break;
         }
 
-        if (!Tokens.empty() && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
-            Tokens[Tokens.size() - 1].is(tok::comma))
+        unsigned NumTokens = MI->getNumTokens();
+        if (NumTokens && Tok.getIdentifierInfo() == Ident__VA_ARGS__ &&
+            MI->getReplacementToken(NumTokens-1).is(tok::comma))
           MI->setHasCommaPasting();
 
         // Things look ok, add the '##' token to the macro.
-        Tokens.push_back(LastTok);
+        MI->AddTokenToBody(LastTok);
         continue;
       }
 
@@ -2824,7 +2823,7 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
         // confused.
         if (getLangOpts().AsmPreprocessor && Tok.isNot(tok::eod)) {
           LastTok.setKind(tok::unknown);
-          Tokens.push_back(LastTok);
+          MI->AddTokenToBody(LastTok);
           continue;
         } else {
           Diag(Tok, diag::err_pp_stringize_not_parameter)
@@ -2834,13 +2833,13 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
       }
 
       // Things look ok, add the '#' and param name tokens to the macro.
-      Tokens.push_back(LastTok);
+      MI->AddTokenToBody(LastTok);
 
       // If the token following '#' is VAOPT, let the next iteration handle it
       // and check it for correctness, otherwise add the token and prime the
       // loop with the next one.
       if (!VAOCtx.isVAOptToken(Tok)) {
-        Tokens.push_back(Tok);
+        MI->AddTokenToBody(Tok);
         LastTok = Tok;
 
         // Get the next token of the macro.
@@ -2856,8 +2855,6 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
     }
   }
   MI->setDefinitionEndLoc(LastTok.getLocation());
-
-  MI->setTokens(Tokens, BP);
   return MI;
 }
 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
@@ -3010,7 +3007,7 @@ void Preprocessor::HandleDefineDirective(
     Tok.startToken();
     Tok.setKind(tok::kw__Static_assert);
     Tok.setIdentifierInfo(getIdentifierInfo("_Static_assert"));
-    MI->setTokens({Tok}, BP);
+    MI->AddTokenToBody(Tok);
     (void)appendDefMacroDirective(getIdentifierInfo("static_assert"), MI);
   }
 }

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 5a09d802b2d8a..0ea904b275f25 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1694,7 +1694,6 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
   RecordData Record;
   SmallVector<IdentifierInfo*, 16> MacroParams;
   MacroInfo *Macro = nullptr;
-  llvm::MutableArrayRef<Token> MacroTokens;
 
   while (true) {
     // Advance to the next record, but if we get to the end of the block, don't
@@ -1749,8 +1748,7 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
       MI->setDefinitionEndLoc(ReadSourceLocation(F, Record, NextIndex));
       MI->setIsUsed(Record[NextIndex++]);
       MI->setUsedForHeaderGuard(Record[NextIndex++]);
-      MacroTokens = MI->allocateTokens(Record[NextIndex++],
-                                       PP.getPreprocessorAllocator());
+
       if (RecType == PP_MACRO_FUNCTION_LIKE) {
         // Decode function-like macro info.
         bool isC99VarArgs = Record[NextIndex++];
@@ -1795,14 +1793,10 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
       // If we see a TOKEN before a PP_MACRO_*, then the file is
       // erroneous, just pretend we didn't see this.
       if (!Macro) break;
-      if (MacroTokens.empty()) {
-        Error("unexpected number of macro tokens for a macro in AST file");
-        return Macro;
-      }
 
       unsigned Idx = 0;
-      MacroTokens[0] = ReadToken(F, Record, Idx);
-      MacroTokens = MacroTokens.drop_front();
+      Token Tok = ReadToken(F, Record, Idx);
+      Macro->AddTokenToBody(Tok);
       break;
     }
     }

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a126e12bcbd99..763fc9537c04b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2431,7 +2431,6 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
     AddSourceLocation(MI->getDefinitionEndLoc(), Record);
     Record.push_back(MI->isUsed());
     Record.push_back(MI->isUsedForHeaderGuard());
-    Record.push_back(MI->getNumTokens());
     unsigned Code;
     if (MI->isObjectLike()) {
       Code = PP_MACRO_OBJECT_LIKE;

diff  --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt
index 99024ba896ac8..97a4e5e44608c 100644
--- a/clang/unittests/Lex/CMakeLists.txt
+++ b/clang/unittests/Lex/CMakeLists.txt
@@ -9,7 +9,6 @@ add_clang_unittest(LexTests
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
-  PPMemoryAllocationsTest.cpp
   )
 
 clang_target_link_libraries(LexTests

diff  --git a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp b/clang/unittests/Lex/PPMemoryAllocationsTest.cpp
deleted file mode 100644
index 894d94e149a9f..0000000000000
--- a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===- unittests/Lex/PPMemoryAllocationsTest.cpp - ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===--------------------------------------------------------------===//
-
-#include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/DiagnosticOptions.h"
-#include "clang/Basic/FileManager.h"
-#include "clang/Basic/LangOptions.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Basic/TargetInfo.h"
-#include "clang/Basic/TargetOptions.h"
-#include "clang/Lex/HeaderSearch.h"
-#include "clang/Lex/HeaderSearchOptions.h"
-#include "clang/Lex/ModuleLoader.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Lex/PreprocessorOptions.h"
-#include "gtest/gtest.h"
-
-using namespace clang;
-
-namespace {
-
-class PPMemoryAllocationsTest : public ::testing::Test {
-protected:
-  PPMemoryAllocationsTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
-        SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
-    TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
-    Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
-  }
-
-  FileSystemOptions FileMgrOpts;
-  FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
-  DiagnosticsEngine Diags;
-  SourceManager SourceMgr;
-  LangOptions LangOpts;
-  std::shared_ptr<TargetOptions> TargetOpts;
-  IntrusiveRefCntPtr<TargetInfo> Target;
-};
-
-TEST_F(PPMemoryAllocationsTest, PPMacroDefinesAllocations) {
-  std::string Source;
-  size_t NumMacros = 1000000;
-  {
-    llvm::raw_string_ostream SourceOS(Source);
-
-    // Create a combination of 1 or 3 token macros.
-    for (size_t I = 0; I < NumMacros; ++I) {
-      SourceOS << "#define MACRO_ID_" << I << " ";
-      if ((I % 2) == 0)
-        SourceOS << "(" << I << ")";
-      else
-        SourceOS << I;
-      SourceOS << "\n";
-    }
-  }
-
-  std::unique_ptr<llvm::MemoryBuffer> Buf =
-      llvm::MemoryBuffer::getMemBuffer(Source);
-  SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
-
-  TrivialModuleLoader ModLoader;
-  HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
-                          Diags, LangOpts, Target.get());
-  Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
-                  SourceMgr, HeaderInfo, ModLoader,
-                  /*IILookup =*/nullptr,
-                  /*OwnsHeaderSearch =*/false);
-  PP.Initialize(*Target);
-  PP.EnterMainSourceFile();
-
-  while (1) {
-    Token tok;
-    PP.Lex(tok);
-    if (tok.is(tok::eof))
-      break;
-  }
-
-  size_t NumAllocated = PP.getPreprocessorAllocator().getBytesAllocated();
-  float BytesPerDefine = float(NumAllocated) / float(NumMacros);
-  llvm::errs() << "Num preprocessor allocations for " << NumMacros
-               << " #define: " << NumAllocated << "\n";
-  llvm::errs() << "Bytes per #define: " << BytesPerDefine << "\n";
-  // On arm64-apple-macos, we get around 120 bytes per define.
-  // Assume a reasonable upper bound based on that number that we don't want
-  // to exceed when storing information about a macro #define with 1 or 3
-  // tokens.
-  EXPECT_LT(BytesPerDefine, 130.0f);
-}
-
-} // anonymous namespace


        


More information about the cfe-commits mailing list