[clang-tools-extra] r326569 - [clang] Fix use-after-free on code completion

Ilya Biryukov via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 2 04:28:27 PST 2018


Author: ibiryukov
Date: Fri Mar  2 04:28:27 2018
New Revision: 326569

URL: http://llvm.org/viewvc/llvm-project?rev=326569&view=rev
Log:
[clang] Fix use-after-free on code completion

Summary:
Found by asan. Fiddling with code completion AST after
FrontendAction::Exceute can lead to errors.
Calling the callback in ProcessCodeCompleteResults to make sure we
don't access uninitialized state.

This particular issue comes from the fact that Sema::TUScope is
deleted when destructor of ~Parser runs, but still present in
Sema::TUScope and accessed when building completion items.

I'm still struggling to come up with a small repro. The relevant
stackframes reported by asan are:
ERROR: AddressSanitizer: heap-use-after-free on address
READ of size 8 at 0x61400020d090 thread T175
    #0 0x5632dff7821b in llvm::SmallPtrSetImplBase::isSmall() const include/llvm/ADT/SmallPtrSet.h:195:33
    #1 0x5632e0335901 in llvm::SmallPtrSetImplBase::insert_imp(void const*) include/llvm/ADT/SmallPtrSet.h:127:9
    #2 0x5632e067347d in llvm::SmallPtrSetImpl<clang::Decl*>::insert(clang::Decl*) include/llvm/ADT/SmallPtrSet.h:372:14
    #3 0x5632e065df80 in clang::Scope::AddDecl(clang::Decl*) tools/clang/include/clang/Sema/Scope.h:287:18
    #4 0x5632e0623eea in clang::ASTReader::pushExternalDeclIntoScope(clang::NamedDecl*, clang::DeclarationName) clang/lib/Serialization/ASTReader.cpp
    #5 0x5632e062ce74 in clang::ASTReader::finishPendingActions() tools/clang/lib/Serialization/ASTReader.cpp:9164:9
    ....
    #30 0x5632e02009c4 in clang::index::generateUSRForDecl(clang::Decl const*, llvm::SmallVectorImpl<char>&) tools/clang/lib/Index/USRGeneration.cpp:1037:6
    #31 0x5632dff73eab in clang::clangd::(anonymous namespace)::getSymbolID(clang::CodeCompletionResult const&) tools/clang/tools/extra/clangd/CodeComplete.cpp:326:20
    #32 0x5632dff6fe91 in clang::clangd::CodeCompleteFlow::mergeResults(std::vector<clang::CodeCompletionResult, std::allocator<clang::CodeCompletionResult> > const&, clang::clangd::SymbolSlab const&)::'lambda'(clang::CodeCompletionResult const&)::operator()(clang::CodeCompletionResult const&) tools/clang/tools/extra/clangd/CodeComplete.cpp:938:24
    #33 0x5632dff6e426 in clang::clangd::CodeCompleteFlow::mergeResults(std::vector<clang::CodeCompletionResult, std::allocator<clang::CodeCompletionResult> > const&, clang::clangd::SymbolSlab const&) third_party/llvm/llvm/tools/clang/tools/extra/clangd/CodeComplete.cpp:949:38
    #34 0x5632dff7a34d in clang::clangd::CodeCompleteFlow::runWithSema() llvm/tools/clang/tools/extra/clangd/CodeComplete.cpp:894:16
    #35 0x5632dff6df6a in clang::clangd::CodeCompleteFlow::run(clang::clangd::(anonymous namespace)::SemaCompleteInput const&) &&::'lambda'()::operator()() const third_party/llvm/llvm/tools/clang/tools/extra/clangd/CodeComplete.cpp:858:35
    #36 0x5632dff6cd42 in clang::clangd::(anonymous namespace)::semaCodeComplete(std::unique_ptr<clang::CodeCompleteConsumer, std::default_delete<clang::CodeCompleteConsumer> >, clang::CodeCompleteOptions const&, clang::clangd::(anonymous namespace)::SemaCompleteInput const&, llvm::function_ref<void ()>) tools/clang/tools/extra/clangd/CodeComplete.cpp:735:5

0x61400020d090 is located 80 bytes inside of 432-byte region [0x61400020d040,0x61400020d1f0)
freed by thread T175  here:
    #0 0x5632df74e115 in operator delete(void*, unsigned long) projects/compiler-rt/lib/asan/asan_new_delete.cc:161:3
    #1 0x5632e0b06973 in clang::Parser::~Parser() tools/clang/lib/Parse/Parser.cpp:410:3
    #2 0x5632e0b06ddd in clang::Parser::~Parser() clang/lib/Parse/Parser.cpp:408:19
    #3 0x5632e0b03286 in std::unique_ptr<clang::Parser, std::default_delete<clang::Parser> >::~unique_ptr() .../bits/unique_ptr.h:236:4
    #4 0x5632e0b021c4 in clang::ParseAST(clang::Sema&, bool, bool) tools/clang/lib/Parse/ParseAST.cpp:182:1
    #5 0x5632e0726544 in clang::FrontendAction::Execute() tools/clang/lib/Frontend/FrontendAction.cpp:904:8
    #6 0x5632dff6cd05 in clang::clangd::(anonymous namespace)::semaCodeComplete(std::unique_ptr<clang::CodeCompleteConsumer, std::default_delete<clang::CodeCompleteConsumer> >, clang::CodeCompleteOptions const&, clang::clangd::(anonymous namespace)::SemaCompleteInput const&, llvm::function_ref<void ()>) tools/clang/tools/extra/clangd/CodeComplete.cpp:728:15

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: klimek, jkorous-apple, cfe-commits, ioeric

Differential Revision: https://reviews.llvm.org/D44000

Modified:
    clang-tools-extra/trunk/clangd/CodeComplete.cpp

Modified: clang-tools-extra/trunk/clangd/CodeComplete.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CodeComplete.cpp?rev=326569&r1=326568&r2=326569&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CodeComplete.cpp (original)
+++ clang-tools-extra/trunk/clangd/CodeComplete.cpp Fri Mar  2 04:28:27 2018
@@ -429,13 +429,19 @@ std::vector<std::string> getQueryScopes(
 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
 // merge with index results first.
+// Generally the fields and methods of this object should only be used from
+// within the callback.
 struct CompletionRecorder : public CodeCompleteConsumer {
-  CompletionRecorder(const CodeCompleteOptions &Opts)
+  CompletionRecorder(const CodeCompleteOptions &Opts,
+                     UniqueFunction<void()> ResultsCallback)
       : CodeCompleteConsumer(Opts.getClangCompleteOpts(),
                              /*OutputIsBinary=*/false),
         CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
-        CCTUInfo(CCAllocator) {}
+        CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {
+    assert(this->ResultsCallback);
+  }
+
   std::vector<CodeCompletionResult> Results;
   CodeCompletionContext CCContext;
   Sema *CCSema = nullptr; // Sema that created the results.
@@ -466,6 +472,7 @@ struct CompletionRecorder : public CodeC
         continue;
       Results.push_back(Result);
     }
+    ResultsCallback();
   }
 
   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
@@ -503,6 +510,7 @@ private:
   CodeCompleteOptions Opts;
   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
   CodeCompletionTUInfo CCTUInfo;
+  UniqueFunction<void()> ResultsCallback;
 };
 
 // Tracks a bounded number of candidates with the best scores.
@@ -657,12 +665,10 @@ struct SemaCompleteInput {
 };
 
 // Invokes Sema code completion on a file.
-// Callback will be invoked once completion is done, but before cleaning up.
 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
                       const clang::CodeCompleteOptions &Options,
-                      const SemaCompleteInput &Input,
-                      llvm::function_ref<void()> Callback = nullptr) {
-  auto Tracer = llvm::make_unique<trace::Span>("Sema completion");
+                      const SemaCompleteInput &Input) {
+  trace::Span Tracer("Sema completion");
   std::vector<const char *> ArgStrs;
   for (const auto &S : Input.Command.CommandLine)
     ArgStrs.push_back(S.c_str());
@@ -729,12 +735,6 @@ bool semaCodeComplete(std::unique_ptr<Co
     log("Execute() failed when running codeComplete for " + Input.FileName);
     return false;
   }
-  Tracer.reset();
-
-  if (Callback)
-    Callback();
-
-  Tracer = llvm::make_unique<trace::Span>("Sema completion cleanup");
   Action.EndSourceFile();
 
   return true;
@@ -813,7 +813,7 @@ clang::CodeCompleteOptions CodeCompleteO
 //     other arenas, which must stay alive for us to build CompletionItems.
 //   - we may get duplicate results from Sema and the Index, we need to merge.
 //
-// So we start Sema completion first, but defer its cleanup until we're done.
+// So we start Sema completion first, and do all our work in its callback.
 // We use the Sema context information to query the index.
 // Then we merge the two result sets, producing items that are Sema/Index/Both.
 // These items are scored, and the top N are synthesized into the LSP response.
@@ -834,8 +834,7 @@ class CodeCompleteFlow {
   PathRef FileName;
   const CodeCompleteOptions &Opts;
   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
-  std::unique_ptr<CompletionRecorder> RecorderOwner;
-  CompletionRecorder &Recorder;
+  CompletionRecorder *Recorder = nullptr;
   int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
   bool Incomplete = false; // Would more be available with a higher limit?
   llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
@@ -843,25 +842,24 @@ class CodeCompleteFlow {
 public:
   // A CodeCompleteFlow object is only useful for calling run() exactly once.
   CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
-      : FileName(FileName), Opts(Opts),
-        RecorderOwner(new CompletionRecorder(Opts)), Recorder(*RecorderOwner) {}
+      : FileName(FileName), Opts(Opts) {}
 
   CompletionList run(const SemaCompleteInput &SemaCCInput) && {
     trace::Span Tracer("CodeCompleteFlow");
     // We run Sema code completion first. It builds an AST and calculates:
-    //   - completion results based on the AST. These are saved for merging.
+    //   - completion results based on the AST.
     //   - partial identifier and context. We need these for the index query.
     CompletionList Output;
+    auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts, [&]() {
+      assert(Recorder && "Recorder is not set");
+      Output = runWithSema();
+      SPAN_ATTACH(Tracer, "sema_completion_kind",
+                  getCompletionKindString(Recorder->CCContext.getKind()));
+    });
+
+    Recorder = RecorderOwner.get();
     semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
-                     SemaCCInput, [&] {
-                       if (Recorder.CCSema) {
-                         Output = runWithSema();
-                         SPAN_ATTACH(
-                             Tracer, "sema_completion_kind",
-                             getCompletionKindString(Recorder.CCContext.getKind()));
-                       } else
-                         log("Code complete: no Sema callback, 0 results");
-                     });
+                     SemaCCInput);
 
     SPAN_ATTACH(Tracer, "sema_results", NSema);
     SPAN_ATTACH(Tracer, "index_results", NIndex);
@@ -883,7 +881,7 @@ private:
   // Sema data structures are torn down. It does all the real work.
   CompletionList runWithSema() {
     Filter = FuzzyMatcher(
-        Recorder.CCSema->getPreprocessor().getCodeCompletionFilter());
+        Recorder->CCSema->getPreprocessor().getCodeCompletionFilter());
     // Sema provides the needed context to query the index.
     // FIXME: in addition to querying for extra/overlapping symbols, we should
     //        explicitly request symbols corresponding to Sema results.
@@ -891,7 +889,7 @@ private:
     // We must copy index results to preserve them, but there are at most Limit.
     auto IndexResults = queryIndex();
     // Merge Sema and Index results, score them, and pick the winners.
-    auto Top = mergeResults(Recorder.Results, IndexResults);
+    auto Top = mergeResults(Recorder->Results, IndexResults);
     // Convert the results to the desired LSP structs.
     CompletionList Output;
     for (auto &C : Top)
@@ -901,7 +899,7 @@ private:
   }
 
   SymbolSlab queryIndex() {
-    if (!Opts.Index || !allowIndex(Recorder.CCContext.getKind()))
+    if (!Opts.Index || !allowIndex(Recorder->CCContext.getKind()))
       return SymbolSlab();
     trace::Span Tracer("Query index");
     SPAN_ATTACH(Tracer, "limit", Opts.Limit);
@@ -912,8 +910,8 @@ private:
     if (Opts.Limit)
       Req.MaxCandidateCount = Opts.Limit;
     Req.Query = Filter->pattern();
-    Req.Scopes =
-        getQueryScopes(Recorder.CCContext, Recorder.CCSema->getSourceManager());
+    Req.Scopes = getQueryScopes(Recorder->CCContext,
+                                Recorder->CCSema->getSourceManager());
     log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])",
                       Req.Query,
                       llvm::join(Req.Scopes.begin(), Req.Scopes.end(), ",")));
@@ -945,7 +943,7 @@ private:
       return nullptr;
     };
     // Emit all Sema results, merging them with Index results if possible.
-    for (auto &SemaResult : Recorder.Results)
+    for (auto &SemaResult : Recorder->Results)
       addCandidate(Top, &SemaResult, CorrespondingIndexResult(SemaResult));
     // Now emit any Index-only results.
     for (const auto &IndexResult : IndexResults) {
@@ -962,7 +960,7 @@ private:
     CompletionCandidate C;
     C.SemaResult = SemaResult;
     C.IndexResult = IndexResult;
-    C.Name = IndexResult ? IndexResult->Name : Recorder.getName(*SemaResult);
+    C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult);
 
     CompletionItemScores Scores;
     if (auto FuzzyScore = Filter->match(C.Name))
@@ -986,7 +984,7 @@ private:
                                   const CompletionItemScores &Scores) {
     CodeCompletionString *SemaCCS = nullptr;
     if (auto *SR = Candidate.SemaResult)
-      SemaCCS = Recorder.codeCompletionString(*SR, Opts.IncludeBriefComments);
+      SemaCCS = Recorder->codeCompletionString(*SR, Opts.IncludeBriefComments);
     return Candidate.build(FileName, Scores, Opts, SemaCCS);
   }
 };




More information about the cfe-commits mailing list