[clang-tools-extra] 5d2d527 - [clangd] Avoid scanning up to end of file on each comment!

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 6 02:39:07 PDT 2022


Author: Sam McCall
Date: 2022-10-06T11:38:55+02:00
New Revision: 5d2d527c32da2081b814ef8b446bc3e037f74b0a

URL: https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a
DIFF: https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a.diff

LOG: [clangd] Avoid scanning up to end of file on each comment!

Assigning char* (pointing at comment start) to StringRef was causing us
to scan the rest of the source file looking for the null terminator.

This seems to be eating about 8% of our *total* CPU!

While fixing this, factor out the common bits from the two places we're
parsing IWYU pragmas.

Differential Revision: https://reviews.llvm.org/D135314

Added: 
    

Modified: 
    clang-tools-extra/clangd/Headers.cpp
    clang-tools-extra/clangd/Headers.h
    clang-tools-extra/clangd/index/CanonicalIncludes.cpp
    clang-tools-extra/clangd/unittests/HeadersTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp
index 5231a47487bc7..52b954e921620 100644
--- a/clang-tools-extra/clangd/Headers.cpp
+++ b/clang-tools-extra/clangd/Headers.cpp
@@ -22,9 +22,17 @@
 namespace clang {
 namespace clangd {
 
-const char IWYUPragmaKeep[] = "// IWYU pragma: keep";
-const char IWYUPragmaExport[] = "// IWYU pragma: export";
-const char IWYUPragmaBeginExports[] = "// IWYU pragma: begin_exports";
+llvm::Optional<StringRef> parseIWYUPragma(const char *Text) {
+  // This gets called for every comment seen in the preamble, so it's quite hot.
+  constexpr llvm::StringLiteral IWYUPragma = "// IWYU pragma: ";
+  if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size()))
+    return llvm::None;
+  Text += IWYUPragma.size();
+  const char *End = Text;
+  while (*End != 0 && *End != '\n')
+    ++End;
+  return StringRef(Text, End - Text);
+}
 
 class IncludeStructure::RecordHeaders : public PPCallbacks,
                                         public CommentHandler {
@@ -129,10 +137,10 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
   }
 
   bool HandleComment(Preprocessor &PP, SourceRange Range) override {
-    bool Err = false;
-    llvm::StringRef Text = SM.getCharacterData(Range.getBegin(), &Err);
-    if (Err)
+    auto Pragma = parseIWYUPragma(SM.getCharacterData(Range.getBegin()));
+    if (!Pragma)
       return false;
+
     if (inMainFile()) {
       // Given:
       //
@@ -150,8 +158,7 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
       // will know that the next inclusion is behind the IWYU pragma.
       // FIXME: Support "IWYU pragma: begin_exports" and "IWYU pragma:
       // end_exports".
-      if (!Text.startswith(IWYUPragmaExport) &&
-          !Text.startswith(IWYUPragmaKeep))
+      if (!Pragma->startswith("export") && !Pragma->startswith("keep"))
         return false;
       unsigned Offset = SM.getFileOffset(Range.getBegin());
       LastPragmaKeepInMainFileLine =
@@ -161,8 +168,7 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
       // does not support them properly yet, so they will be not marked as
       // unused.
       // FIXME: Once IncludeCleaner supports export pragmas, remove this.
-      if (!Text.startswith(IWYUPragmaExport) &&
-          !Text.startswith(IWYUPragmaBeginExports))
+      if (!Pragma->startswith("export") && !Pragma->startswith("begin_exports"))
         return false;
       Out->HasIWYUExport.insert(
           *Out->getID(SM.getFileEntryForID(SM.getFileID(Range.getBegin()))));

diff  --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
index ff3f063168325..ba72ad397bf8f 100644
--- a/clang-tools-extra/clangd/Headers.h
+++ b/clang-tools-extra/clangd/Headers.h
@@ -35,6 +35,12 @@ namespace clangd {
 /// Returns true if \p Include is literal include like "path" or <path>.
 bool isLiteralInclude(llvm::StringRef Include);
 
+/// If Text begins an Include-What-You-Use directive, returns it.
+/// Given "// IWYU pragma: keep", returns "keep".
+/// Input is a null-terminated char* as provided by SM.getCharacterData().
+/// (This should not be StringRef as we do *not* want to scan for its length).
+llvm::Optional<StringRef> parseIWYUPragma(const char *Text);
+
 /// Represents a header file to be #include'd.
 struct HeaderFile {
   std::string File;

diff  --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
index b0ae42a96ecf4..8568079ca1adb 100644
--- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -17,8 +17,6 @@
 namespace clang {
 namespace clangd {
 namespace {
-const char IWYUPragma[] = "// IWYU pragma: private, include ";
-
 const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = {
     {"include/__stddef_max_align_t.h", "<cstddef>"},
     {"include/__wmmintrin_aes.h", "<wmmintrin.h>"},
@@ -712,17 +710,17 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) {
     PragmaCommentHandler(CanonicalIncludes *Includes) : Includes(Includes) {}
 
     bool HandleComment(Preprocessor &PP, SourceRange Range) override {
-      llvm::StringRef Text =
-          Lexer::getSourceText(CharSourceRange::getCharRange(Range),
-                               PP.getSourceManager(), PP.getLangOpts());
-      if (!Text.consume_front(IWYUPragma))
+      auto Pragma = parseIWYUPragma(
+          PP.getSourceManager().getCharacterData(Range.getBegin()));
+      if (!Pragma || !Pragma->consume_front("private, include "))
         return false;
       auto &SM = PP.getSourceManager();
       // We always insert using the spelling from the pragma.
       if (auto *FE = SM.getFileEntryForID(SM.getFileID(Range.getBegin())))
-        Includes->addMapping(
-            FE->getLastRef(),
-            isLiteralInclude(Text) ? Text.str() : ("\"" + Text + "\"").str());
+        Includes->addMapping(FE->getLastRef(),
+                             isLiteralInclude(*Pragma)
+                                 ? Pragma->str()
+                                 : ("\"" + *Pragma + "\"").str());
       return false;
     }
 

diff  --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
index 32e4aea15490b..324d4b58a1ef1 100644
--- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -9,6 +9,7 @@
 #include "Headers.h"
 
 #include "Compiler.h"
+#include "Matchers.h"
 #include "TestFS.h"
 #include "TestTU.h"
 #include "clang/Basic/TokenKinds.h"
@@ -30,6 +31,7 @@ namespace {
 using ::testing::AllOf;
 using ::testing::Contains;
 using ::testing::ElementsAre;
+using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::Not;
 using ::testing::UnorderedElementsAre;
@@ -445,6 +447,18 @@ TEST_F(HeadersTest, HasIWYUPragmas) {
   EXPECT_FALSE(Includes.hasIWYUExport(getID("none.h", Includes)));
 }
 
+TEST(Headers, ParseIWYUPragma) {
+  EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep"), HasValue(Eq("keep")));
+  EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep\netc"),
+              HasValue(Eq("keep")));
+  EXPECT_EQ(parseIWYUPragma("/* IWYU pragma: keep"), llvm::None)
+      << "Only // comments supported!";
+  EXPECT_EQ(parseIWYUPragma("//  IWYU pragma: keep"), llvm::None)
+      << "Sensitive to whitespace";
+  EXPECT_EQ(parseIWYUPragma("// IWYU pragma:keep"), llvm::None)
+      << "Sensitive to whitespace";
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang


        


More information about the cfe-commits mailing list