[clang-tools-extra] 5d2d527 - [clangd] Avoid scanning up to end of file on each comment!

David Blaikie via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 10 10:56:56 PDT 2022


Could the underlying API be made more robust to handle StringRefs
rather than null terminated char* in the first place? (like
SourceManager::getCharacterData could return StringRef? Presumably at
some layer it already knows how long the file is - the underlying
MemoryBuffer, in this case)

On Thu, Oct 6, 2022 at 2:39 AM Sam McCall via cfe-commits
<cfe-commits at lists.llvm.org> wrote:
>
>
> Author: Sam McCall
> Date: 2022-10-06T11:38:55+02:00
> New Revision: 5d2d527c32da2081b814ef8b446bc3e037f74b0a
>
> URL: https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a
> DIFF: https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a.diff
>
> LOG: [clangd] Avoid scanning up to end of file on each comment!
>
> Assigning char* (pointing at comment start) to StringRef was causing us
> to scan the rest of the source file looking for the null terminator.
>
> This seems to be eating about 8% of our *total* CPU!
>
> While fixing this, factor out the common bits from the two places we're
> parsing IWYU pragmas.
>
> Differential Revision: https://reviews.llvm.org/D135314
>
> Added:
>
>
> Modified:
>     clang-tools-extra/clangd/Headers.cpp
>     clang-tools-extra/clangd/Headers.h
>     clang-tools-extra/clangd/index/CanonicalIncludes.cpp
>     clang-tools-extra/clangd/unittests/HeadersTests.cpp
>
> Removed:
>
>
>
> ################################################################################
> diff  --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp
> index 5231a47487bc7..52b954e921620 100644
> --- a/clang-tools-extra/clangd/Headers.cpp
> +++ b/clang-tools-extra/clangd/Headers.cpp
> @@ -22,9 +22,17 @@
>  namespace clang {
>  namespace clangd {
>
> -const char IWYUPragmaKeep[] = "// IWYU pragma: keep";
> -const char IWYUPragmaExport[] = "// IWYU pragma: export";
> -const char IWYUPragmaBeginExports[] = "// IWYU pragma: begin_exports";
> +llvm::Optional<StringRef> parseIWYUPragma(const char *Text) {
> +  // This gets called for every comment seen in the preamble, so it's quite hot.
> +  constexpr llvm::StringLiteral IWYUPragma = "// IWYU pragma: ";
> +  if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size()))
> +    return llvm::None;
> +  Text += IWYUPragma.size();
> +  const char *End = Text;
> +  while (*End != 0 && *End != '\n')
> +    ++End;
> +  return StringRef(Text, End - Text);
> +}
>
>  class IncludeStructure::RecordHeaders : public PPCallbacks,
>                                          public CommentHandler {
> @@ -129,10 +137,10 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
>    }
>
>    bool HandleComment(Preprocessor &PP, SourceRange Range) override {
> -    bool Err = false;
> -    llvm::StringRef Text = SM.getCharacterData(Range.getBegin(), &Err);
> -    if (Err)
> +    auto Pragma = parseIWYUPragma(SM.getCharacterData(Range.getBegin()));
> +    if (!Pragma)
>        return false;
> +
>      if (inMainFile()) {
>        // Given:
>        //
> @@ -150,8 +158,7 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
>        // will know that the next inclusion is behind the IWYU pragma.
>        // FIXME: Support "IWYU pragma: begin_exports" and "IWYU pragma:
>        // end_exports".
> -      if (!Text.startswith(IWYUPragmaExport) &&
> -          !Text.startswith(IWYUPragmaKeep))
> +      if (!Pragma->startswith("export") && !Pragma->startswith("keep"))
>          return false;
>        unsigned Offset = SM.getFileOffset(Range.getBegin());
>        LastPragmaKeepInMainFileLine =
> @@ -161,8 +168,7 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
>        // does not support them properly yet, so they will be not marked as
>        // unused.
>        // FIXME: Once IncludeCleaner supports export pragmas, remove this.
> -      if (!Text.startswith(IWYUPragmaExport) &&
> -          !Text.startswith(IWYUPragmaBeginExports))
> +      if (!Pragma->startswith("export") && !Pragma->startswith("begin_exports"))
>          return false;
>        Out->HasIWYUExport.insert(
>            *Out->getID(SM.getFileEntryForID(SM.getFileID(Range.getBegin()))));
>
> diff  --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
> index ff3f063168325..ba72ad397bf8f 100644
> --- a/clang-tools-extra/clangd/Headers.h
> +++ b/clang-tools-extra/clangd/Headers.h
> @@ -35,6 +35,12 @@ namespace clangd {
>  /// Returns true if \p Include is literal include like "path" or <path>.
>  bool isLiteralInclude(llvm::StringRef Include);
>
> +/// If Text begins an Include-What-You-Use directive, returns it.
> +/// Given "// IWYU pragma: keep", returns "keep".
> +/// Input is a null-terminated char* as provided by SM.getCharacterData().
> +/// (This should not be StringRef as we do *not* want to scan for its length).
> +llvm::Optional<StringRef> parseIWYUPragma(const char *Text);
> +
>  /// Represents a header file to be #include'd.
>  struct HeaderFile {
>    std::string File;
>
> diff  --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
> index b0ae42a96ecf4..8568079ca1adb 100644
> --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
> +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
> @@ -17,8 +17,6 @@
>  namespace clang {
>  namespace clangd {
>  namespace {
> -const char IWYUPragma[] = "// IWYU pragma: private, include ";
> -
>  const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = {
>      {"include/__stddef_max_align_t.h", "<cstddef>"},
>      {"include/__wmmintrin_aes.h", "<wmmintrin.h>"},
> @@ -712,17 +710,17 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
>      PragmaCommentHandler(CanonicalIncludes *Includes) : Includes(Includes) {}
>
>      bool HandleComment(Preprocessor &PP, SourceRange Range) override {
> -      llvm::StringRef Text =
> -          Lexer::getSourceText(CharSourceRange::getCharRange(Range),
> -                               PP.getSourceManager(), PP.getLangOpts());
> -      if (!Text.consume_front(IWYUPragma))
> +      auto Pragma = parseIWYUPragma(
> +          PP.getSourceManager().getCharacterData(Range.getBegin()));
> +      if (!Pragma || !Pragma->consume_front("private, include "))
>          return false;
>        auto &SM = PP.getSourceManager();
>        // We always insert using the spelling from the pragma.
>        if (auto *FE = SM.getFileEntryForID(SM.getFileID(Range.getBegin())))
> -        Includes->addMapping(
> -            FE->getLastRef(),
> -            isLiteralInclude(Text) ? Text.str() : ("\"" + Text + "\"").str());
> +        Includes->addMapping(FE->getLastRef(),
> +                             isLiteralInclude(*Pragma)
> +                                 ? Pragma->str()
> +                                 : ("\"" + *Pragma + "\"").str());
>        return false;
>      }
>
>
> diff  --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
> index 32e4aea15490b..324d4b58a1ef1 100644
> --- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp
> +++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
> @@ -9,6 +9,7 @@
>  #include "Headers.h"
>
>  #include "Compiler.h"
> +#include "Matchers.h"
>  #include "TestFS.h"
>  #include "TestTU.h"
>  #include "clang/Basic/TokenKinds.h"
> @@ -30,6 +31,7 @@ namespace {
>  using ::testing::AllOf;
>  using ::testing::Contains;
>  using ::testing::ElementsAre;
> +using ::testing::Eq;
>  using ::testing::IsEmpty;
>  using ::testing::Not;
>  using ::testing::UnorderedElementsAre;
> @@ -445,6 +447,18 @@ TEST_F(HeadersTest, HasIWYUPragmas) {
>    EXPECT_FALSE(Includes.hasIWYUExport(getID("none.h", Includes)));
>  }
>
> +TEST(Headers, ParseIWYUPragma) {
> +  EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep"), HasValue(Eq("keep")));
> +  EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep\netc"),
> +              HasValue(Eq("keep")));
> +  EXPECT_EQ(parseIWYUPragma("/* IWYU pragma: keep"), llvm::None)
> +      << "Only // comments supported!";
> +  EXPECT_EQ(parseIWYUPragma("//  IWYU pragma: keep"), llvm::None)
> +      << "Sensitive to whitespace";
> +  EXPECT_EQ(parseIWYUPragma("// IWYU pragma:keep"), llvm::None)
> +      << "Sensitive to whitespace";
> +}
> +
>  } // namespace
>  } // namespace clangd
>  } // namespace clang
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


More information about the cfe-commits mailing list