[clang] 773ad55 - [index] Fix performance regression with indexing macros

Ben Langmuir via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 16 10:16:39 PDT 2021


Author: Ben Langmuir
Date: 2021-06-16T10:16:26-07:00
New Revision: 773ad55a393f368cc92b1611c52e493ed45a353f

URL: https://github.com/llvm/llvm-project/commit/773ad55a393f368cc92b1611c52e493ed45a353f
DIFF: https://github.com/llvm/llvm-project/commit/773ad55a393f368cc92b1611c52e493ed45a353f.diff

LOG: [index] Fix performance regression with indexing macros

When using FileIndexRecord with macros, symbol references can be seen
out of source order, which was causing a regression to insert the
symbols into a vector. Instead, we now lazily sort the vector. The
impact is small on most code, but in very large files with many macro
references (M) near the beginning of the file followed by many decl
references (D) it was O(M*D). A particularly bad protobuf-generated
header was observed with a 100% regression in practice.

rdar://78628133

Added: 
    

Modified: 
    clang/lib/Index/FileIndexRecord.cpp
    clang/lib/Index/FileIndexRecord.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Index/FileIndexRecord.cpp b/clang/lib/Index/FileIndexRecord.cpp
index 950a99a2304b..d392a2bedeba 100644
--- a/clang/lib/Index/FileIndexRecord.cpp
+++ b/clang/lib/Index/FileIndexRecord.cpp
@@ -17,23 +17,16 @@
 using namespace clang;
 using namespace clang::index;
 
-static void addOccurrence(std::vector<DeclOccurrence> &Decls,
-                          DeclOccurrence Info) {
-  auto IsNextOccurence = [&]() -> bool {
-    if (Decls.empty())
-      return true;
-    auto &Last = Decls.back();
-    return Last.Offset < Info.Offset;
-  };
-
-  if (IsNextOccurence()) {
-    Decls.push_back(std::move(Info));
-    return;
+ArrayRef<DeclOccurrence>
+FileIndexRecord::getDeclOccurrencesSortedByOffset() const {
+  if (!IsSorted) {
+    llvm::stable_sort(Decls,
+                      [](const DeclOccurrence &A, const DeclOccurrence &B) {
+                        return A.Offset < B.Offset;
+                      });
+    IsSorted = true;
   }
-
-  // We keep Decls in order as we need to access them in this order in all cases.
-  auto It = llvm::upper_bound(Decls, Info);
-  Decls.insert(It, std::move(Info));
+  return Decls;
 }
 
 void FileIndexRecord::addDeclOccurence(SymbolRoleSet Roles, unsigned Offset,
@@ -41,13 +34,15 @@ void FileIndexRecord::addDeclOccurence(SymbolRoleSet Roles, unsigned Offset,
                                        ArrayRef<SymbolRelation> Relations) {
   assert(D->isCanonicalDecl() &&
          "Occurrences should be associated with their canonical decl");
-  addOccurrence(Decls, DeclOccurrence(Roles, Offset, D, Relations));
+  IsSorted = false;
+  Decls.emplace_back(Roles, Offset, D, Relations);
 }
 
 void FileIndexRecord::addMacroOccurence(SymbolRoleSet Roles, unsigned Offset,
                                         const IdentifierInfo *Name,
                                         const MacroInfo *MI) {
-  addOccurrence(Decls, DeclOccurrence(Roles, Offset, Name, MI));
+  IsSorted = false;
+  Decls.emplace_back(Roles, Offset, Name, MI);
 }
 
 void FileIndexRecord::removeHeaderGuardMacros() {

diff  --git a/clang/lib/Index/FileIndexRecord.h b/clang/lib/Index/FileIndexRecord.h
index 037f39f10aef..621d5b78977d 100644
--- a/clang/lib/Index/FileIndexRecord.h
+++ b/clang/lib/Index/FileIndexRecord.h
@@ -27,14 +27,13 @@ class FileIndexRecord {
 private:
   FileID FID;
   bool IsSystem;
-  std::vector<DeclOccurrence> Decls;
+  mutable bool IsSorted = false;
+  mutable std::vector<DeclOccurrence> Decls;
 
 public:
   FileIndexRecord(FileID FID, bool IsSystem) : FID(FID), IsSystem(IsSystem) {}
 
-  ArrayRef<DeclOccurrence> getDeclOccurrencesSortedByOffset() const {
-    return Decls;
-  }
+  ArrayRef<DeclOccurrence> getDeclOccurrencesSortedByOffset() const;
 
   FileID getFileID() const { return FID; }
   bool isSystem() const { return IsSystem; }


        


More information about the cfe-commits mailing list