[clang] Implement `-dump-minimization-hints` flag. (PR #133910)
Viktoriia Bakalova via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 9 08:53:22 PDT 2025
================
@@ -49,6 +54,185 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
namespace {
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
+/// to aid debugging and bug minimization. It implements ASTConsumer and
+/// ASTDeserializationListener, so that an object of
+/// DeserializedDeclsLineRangePrinter registers as its own listener. The
+/// ASTDeserializationListener interface provides the DeclRead callback that we
+/// use to collect the deserialized Decls. Note that printing or otherwise
+/// processing them as this point is dangerous, since that could trigger
+/// additional deserialization and crash compilation. Therefore, we process the
+/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
+/// safe point, since we know that by this point all the Decls needed by the
+/// compiler frontend have been deserialized. In case our processing causes
+/// further deserialization, DeclRead from the listener might be called again.
+/// However, at that point we don't accept any more Decls for processing.
+class DeserializedDeclsLineRangePrinter : public ASTConsumer,
+ ASTDeserializationListener {
+public:
+ explicit DeserializedDeclsLineRangePrinter(
+ SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
+ : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
+
+ ASTDeserializationListener *GetASTDeserializationListener() override {
+ return this;
+ }
+
+ void DeclRead(GlobalDeclID ID, const Decl *D) override {
+ if (!IsCollectingDecls)
+ return;
+ if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
+ isa<NamespaceDecl>(D)) {
+ // These decls cover a lot of nested declarations that might not be used,
+ // reducing the granularity and making the output less useful.
+ return;
+ }
+ if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
+ // We choose to work at namespace level to reduce complexity and the
+ // number of cases we care about.
+ return;
+ }
+ PendingDecls.push_back(D);
+ }
+
+ struct Position {
+ unsigned Line;
+ unsigned Column;
+
+ bool operator<(const Position &other) const {
+ if (Line < other.Line)
+ return true;
+ if (Line > other.Line)
+ return false;
+ return Column < other.Column;
+ }
+
+ static Position GetBeginSpelling(const SourceManager &SM,
+ const CharSourceRange &R) {
+ SourceLocation Begin = R.getBegin();
+ return {SM.getSpellingLineNumber(Begin),
+ SM.getSpellingColumnNumber(Begin)};
+ }
+
+ static Position GetEndSpelling(const SourceManager &SM,
+ const CharSourceRange &Range,
+ const LangOptions &LangOpts) {
+ // For token ranges, compute end location for end character of the range.
+ // The end location of returned range is exclusive.
+ CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts);
+ SourceLocation End = R.getEnd();
+ // Relex the token past the end location of the last token in the source
+ // range. If it's a semicolon, advance the location by one token.
+ Token PossiblySemi;
+ Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
+ if (PossiblySemi.is(tok::semi))
+ End = End.getLocWithOffset(1);
+ return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
+ }
+ };
+
+ struct RequiredRanges {
+ StringRef Filename;
+ std::vector<std::pair<Position, Position>> FromTo;
+ };
+ void HandleTranslationUnit(ASTContext &Context) override {
+ assert(IsCollectingDecls && "HandleTranslationUnit called twice?");
+ IsCollectingDecls = false;
+
+ // Merge ranges in each of the files.
+ struct FileData {
+ std::vector<std::pair<Position, Position>> FromTo;
+ OptionalFileEntryRef Ref;
+ };
+ llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+ for (const Decl *D : PendingDecls) {
+ CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
+ if (!R.isValid())
+ continue;
+
+ auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+ if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+ // Such cases are rare and difficult to handle.
+ continue;
+ }
+
+ auto &Data = FileToLines[F];
+ if (!Data.Ref)
+ Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+ Data.FromTo.push_back(
+ {Position::GetBeginSpelling(SM, R),
+ Position::GetEndSpelling(SM, R, D->getLangOpts())});
+ }
+
+ // To simplify output, merge consecutive and intersecting ranges.
+ std::vector<RequiredRanges> Result;
+ for (auto &[F, Data] : FileToLines) {
+ auto &FromTo = Data.FromTo;
+ assert(!FromTo.empty());
+
+ if (!Data.Ref)
+ continue;
+
+ llvm::sort(FromTo);
+
+ std::vector<std::pair<Position, Position>> MergedRanges;
+ MergedRanges.push_back(FromTo.front());
+ for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
+ if (MergedRanges.back().second < It->first) {
+ MergedRanges.push_back(*It);
+ continue;
+ }
+ if (MergedRanges.back().second < It->second)
+ MergedRanges.back().second = It->second;
+ }
+ Result.push_back({Data.Ref->getName(), MergedRanges});
+ }
+ printJson(Result);
+ }
+
+private:
+ std::vector<const Decl *> PendingDecls;
+ bool IsCollectingDecls = true;
+ const SourceManager &SM;
+ std::unique_ptr<llvm::raw_ostream> OS;
+
+ void printJson(llvm::ArrayRef<RequiredRanges> Result) {
+ *OS << "{\n";
----------------
VitaNuo wrote:
I've run some tests, and the maximum I'm getting is 4.4Mb of JSON for a very large compilation. I think this should be palatable. For a smaller compilation, it's often <1Mb.
https://github.com/llvm/llvm-project/pull/133910
More information about the cfe-commits
mailing list