[clang] a443b3d - [dataflow] add HTML logger: browse code/cfg/analysis timeline/state

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 19 06:37:12 PDT 2023


Author: Sam McCall
Date: 2023-04-19T15:37:06+02:00
New Revision: a443b3d18ef4d01e767994845b3f2819480a7b48

URL: https://github.com/llvm/llvm-project/commit/a443b3d18ef4d01e767994845b3f2819480a7b48
DIFF: https://github.com/llvm/llvm-project/commit/a443b3d18ef4d01e767994845b3f2819480a7b48.diff

LOG: [dataflow] add HTML logger: browse code/cfg/analysis timeline/state

With -dataflow-log=/dir we will write /dir/0.html etc for each
function analyzed.

These files show the function's code and CFG, and the path through
the CFG taken by the analysis. At each analysis point we can see the
lattice state.

Currently the lattice state dump is not terribly useful but we can
improve this: showing values associated with the current Expr,
simplifying flow condition, highlighting changes etc.

(Trying not to let this patch scope-creep too much, so I ripped out the
half-finished features)

Demo: https://htmlpreview.github.io/?https://gist.githubusercontent.com/sam-mccall/1746985bf13406bd19181af281aea9ff/raw/9718fdd48406dabccb3092acd983b4bd55da9dfa/analysis.html

Differential Revision: https://reviews.llvm.org/D146591

Added: 
    clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
    clang/lib/Analysis/FlowSensitive/HTMLLogger.css
    clang/lib/Analysis/FlowSensitive/HTMLLogger.html
    clang/lib/Analysis/FlowSensitive/HTMLLogger.js
    clang/utils/bundle_resources.py

Modified: 
    clang/include/clang/Analysis/FlowSensitive/Logger.h
    clang/lib/Analysis/FlowSensitive/CMakeLists.txt
    clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
    clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/FlowSensitive/Logger.h b/clang/include/clang/Analysis/FlowSensitive/Logger.h
index 903dfbc30d40d..6836488003a97 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Logger.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Logger.h
@@ -31,6 +31,10 @@ class Logger {
   /// A logger that simply writes messages to the specified ostream in real
   /// time.
   static std::unique_ptr<Logger> textual(llvm::raw_ostream &);
+  /// A logger that builds an HTML UI to inspect the analysis results.
+  /// Each function's analysis is written to a stream obtained from the factory.
+  static std::unique_ptr<Logger>
+      html(std::function<std::unique_ptr<llvm::raw_ostream>()>);
 
   virtual ~Logger() = default;
 

diff  --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
index 86646662c4da9..aafbdfb2f6bdf 100644
--- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
+++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt
@@ -3,6 +3,7 @@ add_clang_library(clangAnalysisFlowSensitive
   ControlFlowContext.cpp
   DataflowAnalysisContext.cpp
   DataflowEnvironment.cpp
+  HTMLLogger.cpp
   Logger.cpp
   Transfer.cpp
   TypeErasedDataflowAnalysis.cpp
@@ -17,3 +18,14 @@ add_clang_library(clangAnalysisFlowSensitive
   )
 
 add_subdirectory(Models)
+
+add_custom_command(OUTPUT HTMLLogger.inc
+  COMMAND "${Python3_EXECUTABLE}" ${CLANG_SOURCE_DIR}/utils/bundle_resources.py 
+  ${CMAKE_CURRENT_BINARY_DIR}/HTMLLogger.inc
+  HTMLLogger.html HTMLLogger.css HTMLLogger.js
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  COMMENT "Bundling HTMLLogger resources"
+  DEPENDS ${CLANG_SOURCE_DIR}/utils/bundle_resources.py HTMLLogger.html HTMLLogger.css HTMLLogger.js
+  VERBATIM)
+add_custom_target(clangAnalysisFlowSensitiveResources DEPENDS HTMLLogger.inc)
+add_dependencies(clangAnalysisFlowSensitive clangAnalysisFlowSensitiveResources)

diff  --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index ad57fd156f443..5dd390e962d82 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -20,14 +20,17 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 #include <cassert>
 #include <memory>
 #include <utility>
 
-static llvm::cl::opt<std::string>
-    DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
-                llvm::cl::desc("Emit log of dataflow analysis. With no arg, "
-                               "writes textual log to stderr."));
+static llvm::cl::opt<std::string> DataflowLog(
+    "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
+    llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
+                   "log to stderr. With an arg, writes HTML logs under the "
+                   "specified directory (one per analyzed function)."));
 
 namespace clang {
 namespace dataflow {
@@ -218,6 +221,34 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
   return nullptr;
 }
 
+static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
+  if (DataflowLog.empty())
+    return Logger::textual(llvm::errs());
+
+  llvm::StringRef Dir = DataflowLog;
+  if (auto EC = llvm::sys::fs::create_directories(Dir))
+    llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
+  // All analysis runs within a process will log to the same directory.
+  // Share a counter so they don't all overwrite each other's 0.html.
+  // (Don't share a logger, it's not threadsafe).
+  static std::atomic<unsigned> Counter = {0};
+  auto StreamFactory =
+      [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
+    llvm::SmallString<256> File(Dir);
+    llvm::sys::path::append(File,
+                            std::to_string(Counter.fetch_add(1)) + ".html");
+    std::error_code EC;
+    auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
+    if (EC) {
+      llvm::errs() << "Failed to create log " << File << ": " << EC.message()
+                   << "\n";
+      return std::make_unique<llvm::raw_null_ostream>();
+    }
+    return OS;
+  };
+  return Logger::html(std::move(StreamFactory));
+}
+
 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
                                                  Options Opts)
     : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
@@ -227,7 +258,7 @@ DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
   // based tools.
   if (Opts.Log == nullptr) {
     if (DataflowLog.getNumOccurrences() > 0) {
-      LogOwner = Logger::textual(llvm::errs());
+      LogOwner = makeLoggerFromCommandLine();
       this->Opts.Log = LogOwner.get();
       // FIXME: if the flag is given a value, write an HTML log to a file.
     } else {

diff  --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
new file mode 100644
index 0000000000000..cbf7a22bf7fa7
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
@@ -0,0 +1,451 @@
+//===-- HTMLLogger.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HTML logger. Given a directory dir/, we write
+// dir/0.html for the first analysis, etc.
+// These files contain a visualization that allows inspecting the CFG and the
+// state of the analysis at each point.
+// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
+// so each output file is self-contained.
+//
+// VIEWS
+//
+// The timeline and function view are always shown. These allow selecting basic
+// blocks, statements within them, and processing iterations (BBs are visited
+// multiple times when e.g. loops are involved).
+// These are written directly into the HTML body.
+//
+// There are also listings of particular basic blocks, and dumps of the state
+// at particular analysis points (i.e. BB2 iteration 3 statement 2).
+// These are only shown when the relevant BB/analysis point is *selected*.
+//
+// DATA AND TEMPLATES
+//
+// The HTML proper is mostly static.
+// The analysis data is in a JSON object HTMLLoggerData which is embedded as
+// a <script> in the <head>.
+// This gets rendered into DOM by a simple template processor which substitutes
+// the data into <template> tags embedded in the HTML. (see inflate() in JS).
+//
+// SELECTION
+//
+// This is the only real interactive mechanism.
+//
+// At any given time, there are several named selections, e.g.:
+//   bb: B2               (basic block 0 is selected)
+//   elt: B2.4            (statement 4 is selected)
+//   iter: B2:1           (iteration 1 of the basic block is selected)
+//   hover: B3            (hovering over basic block 3)
+//
+// The selection is updated by mouse events: hover by moving the mouse and
+// others by clicking. Elements that are click targets generally have attributes
+// (id or data-foo) that define what they should select.
+// See watchSelection() in JS for the exact logic.
+//
+// When the "bb" selection is set to "B2":
+//   - sections <section data-selection="bb"> get shown
+//   - templates under such sections get re-rendered
+//   - elements with class/id "B2" get class "bb-select"
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DebugSupport.h"
+#include "clang/Analysis/FlowSensitive/Logger.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/Value.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+// Defines assets: HTMLLogger_{html_js,css}
+#include "HTMLLogger.inc"
+
+namespace clang::dataflow {
+namespace {
+
+// Render a graphviz graph specification to SVG using the `dot` tool.
+llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
+
+using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
+
+class HTMLLogger : public Logger {
+  StreamFactory Streams;
+  std::unique_ptr<llvm::raw_ostream> OS;
+  std::optional<llvm::json::OStream> JOS;
+
+  const ControlFlowContext *CFG;
+  // Timeline of iterations of CFG block visitation.
+  std::vector<std::pair<const CFGBlock *, unsigned>> Iters;
+  // Number of times each CFG block has been seen.
+  llvm::DenseMap<const CFGBlock *, unsigned> BlockIters;
+  // The messages logged in the current context but not yet written.
+  std::string ContextLogs;
+  // The number of elements we have visited within the current CFG block.
+  unsigned ElementIndex;
+
+public:
+  explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
+  void beginAnalysis(const ControlFlowContext &CFG,
+                     TypeErasedDataflowAnalysis &A) override {
+    OS = Streams();
+    this->CFG = &CFG;
+    *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
+
+    if (const auto *D = CFG.getDecl()) {
+      const auto &SM = A.getASTContext().getSourceManager();
+      *OS << "<title>";
+      if (const auto *ND = dyn_cast<NamedDecl>(D))
+        *OS << ND->getNameAsString() << " at ";
+      *OS << SM.getFilename(D->getLocation()) << ":"
+          << SM.getSpellingLineNumber(D->getLocation());
+      *OS << "</title>\n";
+    };
+
+    *OS << "<style>" << HTMLLogger_css << "</style>\n";
+    *OS << "<script>" << HTMLLogger_js << "</script>\n";
+
+    writeCode();
+    writeCFG();
+
+    *OS << "<script>var HTMLLoggerData = \n";
+    JOS.emplace(*OS, /*Indent=*/2);
+    JOS->objectBegin();
+    JOS->attributeBegin("states");
+    JOS->objectBegin();
+  }
+  // Between beginAnalysis() and endAnalysis() we write all the states for
+  // particular analysis points into the `timeline` array.
+  void endAnalysis() override {
+    JOS->objectEnd();
+    JOS->attributeEnd();
+
+    JOS->attributeArray("timeline", [&] {
+      for (const auto &E : Iters) {
+        JOS->object([&] {
+          JOS->attribute("block", blockID(E.first->getBlockID()));
+          JOS->attribute("iter", E.second);
+        });
+      }
+    });
+    JOS->attributeObject("cfg", [&] {
+      for (const auto &E : BlockIters)
+        writeBlock(*E.first, E.second);
+    });
+
+    JOS->objectEnd();
+    JOS.reset();
+    *OS << ";\n</script>\n";
+    *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
+  }
+
+  void enterBlock(const CFGBlock &B) override {
+    Iters.emplace_back(&B, ++BlockIters[&B]);
+    ElementIndex = 0;
+  }
+  void enterElement(const CFGElement &E) override {
+    ++ElementIndex;
+  }
+
+  static std::string blockID(unsigned Block) {
+    return llvm::formatv("B{0}", Block);
+  }
+  static std::string eltID(unsigned Block, unsigned Element) {
+    return llvm::formatv("B{0}.{1}", Block, Element);
+  }
+  static std::string iterID(unsigned Block, unsigned Iter) {
+    return llvm::formatv("B{0}:{1}", Block, Iter);
+  }
+  static std::string elementIterID(unsigned Block, unsigned Iter,
+                                   unsigned Element) {
+    return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
+  }
+
+  // Write the analysis state associated with a particular analysis point.
+  // FIXME: this dump is fairly opaque. We should show:
+  //  - values associated with the current Stmt
+  //  - values associated with its children
+  //  - meaningful names for values
+  //  - which boolean values are implied true/false by the flow condition
+  void recordState(TypeErasedDataflowAnalysisState &State) override {
+    unsigned Block = Iters.back().first->getBlockID();
+    unsigned Iter = Iters.back().second;
+    JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
+      JOS->attribute("block", blockID(Block));
+      JOS->attribute("iter", Iter);
+      JOS->attribute("element", ElementIndex);
+      if (!ContextLogs.empty()) {
+        JOS->attribute("logs", ContextLogs);
+        ContextLogs.clear();
+      }
+      {
+        std::string BuiltinLattice;
+        llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
+        State.Env.dump(BuiltinLatticeS);
+        JOS->attribute("builtinLattice", BuiltinLattice);
+      }
+    });
+  }
+  void blockConverged() override { logText("Block converged"); }
+
+  void logText(llvm::StringRef S) override {
+    ContextLogs.append(S.begin(), S.end());
+    ContextLogs.push_back('\n');
+  }
+
+private:
+  // Write the CFG block details.
+  // Currently this is just the list of elements in execution order.
+  // FIXME: an AST dump would be a useful view, too.
+  void writeBlock(const CFGBlock &B, unsigned Iters) {
+    JOS->attributeObject(blockID(B.getBlockID()), [&] {
+      JOS->attribute("iters", Iters);
+      JOS->attributeArray("elements", [&] {
+        for (const auto &Elt : B.Elements) {
+          std::string Dump;
+          llvm::raw_string_ostream DumpS(Dump);
+          Elt.dumpToStream(DumpS);
+          JOS->value(Dump);
+        }
+      });
+    });
+  }
+
+  // Write the code of function being examined.
+  // We want to overlay the code with <span>s that mark which BB particular
+  // tokens are associated with, and even which BB element (so that clicking
+  // can select the right element).
+  void writeCode() {
+    if (!CFG->getDecl())
+      return;
+    const auto &AST = CFG->getDecl()->getASTContext();
+    bool Invalid = false;
+
+    // Extract the source code from the original file.
+    // Pretty-printing from the AST would probably be nicer (no macros or
+    // indentation to worry about), but we need the boundaries of particular
+    // AST nodes and the printer doesn't provide this.
+    auto Range = clang::Lexer::makeFileCharRange(
+        CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()),
+        AST.getSourceManager(), AST.getLangOpts());
+    if (Range.isInvalid())
+      return;
+    llvm::StringRef Code = clang::Lexer::getSourceText(
+        Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
+    if (Invalid)
+      return;
+
+    static constexpr unsigned Missing = -1;
+    // TokenInfo stores the BB and set of elements that a token is part of.
+    struct TokenInfo {
+      // The basic block this is part of.
+      // This is the BB of the stmt with the smallest containing range.
+      unsigned BB = Missing;
+      unsigned BBPriority = 0;
+      // The most specific stmt this is part of (smallest range).
+      unsigned Elt = Missing;
+      unsigned EltPriority = 0;
+      // All stmts this is part of.
+      SmallVector<unsigned> Elts;
+
+      // Mark this token as being part of BB.Elt.
+      // RangeLen is the character length of the element's range, used to
+      // distinguish inner vs outer statements.
+      // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
+      // However "a" has a smaller range, so is more specific. Clicking on the
+      // token "a" should select the stmt "a".
+      void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
+        // A worse BB (larger range) => ignore.
+        if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
+          return;
+        if (BB != this->BB) {
+          this->BB = BB;
+          Elts.clear();
+          BBPriority = RangeLen;
+        }
+        BBPriority = std::min(BBPriority, RangeLen);
+        Elts.push_back(Elt);
+        if (this->Elt == Missing || EltPriority > RangeLen)
+          this->Elt = Elt;
+      }
+      bool operator==(const TokenInfo &Other) const {
+        return std::tie(BB, Elt, Elts) ==
+               std::tie(Other.BB, Other.Elt, Other.Elts);
+      }
+      // Write the attributes for the <span> on this token.
+      void write(llvm::raw_ostream &OS) const {
+        OS << "class='c";
+        if (BB != Missing)
+          OS << " " << blockID(BB);
+        for (unsigned Elt : Elts)
+          OS << " " << eltID(BB, Elt);
+        OS << "'";
+
+        if (Elt != Missing)
+          OS << " data-elt='" << eltID(BB, Elt) << "'";
+        if (BB != Missing)
+          OS << " data-bb='" << blockID(BB) << "'";
+      }
+    };
+
+    // Construct one TokenInfo per character in a flat array.
+    // This is inefficient (chars in a token all have the same info) but simple.
+    std::vector<TokenInfo> State(Code.size());
+    for (const auto *Block : CFG->getCFG()) {
+      unsigned EltIndex = 0;
+      for (const auto& Elt : *Block) {
+        ++EltIndex;
+        if (const auto S = Elt.getAs<CFGStmt>()) {
+          auto EltRange = clang::Lexer::makeFileCharRange(
+              CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
+              AST.getSourceManager(), AST.getLangOpts());
+          if (EltRange.isInvalid())
+            continue;
+          if (EltRange.getBegin() < Range.getBegin() ||
+              EltRange.getEnd() >= Range.getEnd() ||
+              EltRange.getEnd() < Range.getBegin() ||
+              EltRange.getEnd() >= Range.getEnd())
+            continue;
+          unsigned Off = EltRange.getBegin().getRawEncoding() -
+                         Range.getBegin().getRawEncoding();
+          unsigned Len = EltRange.getEnd().getRawEncoding() -
+                         EltRange.getBegin().getRawEncoding();
+          for (unsigned I = 0; I < Len; ++I)
+            State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
+        }
+      }
+    }
+
+    // Finally, write the code with the correct <span>s.
+    unsigned Line =
+        AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
+    *OS << "<template data-copy='code'>\n";
+    *OS << "<code class='filename'>";
+    llvm::printHTMLEscaped(
+        llvm::sys::path::filename(
+            AST.getSourceManager().getFilename(Range.getBegin())),
+        *OS);
+    *OS << "</code>";
+    *OS << "<code class='line' data-line='" << Line++ << "'>";
+    for (unsigned I = 0; I < Code.size(); ++I) {
+      // Don't actually write a <span> around each character, only break spans
+      // when the TokenInfo changes.
+      bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
+      bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
+      if (NeedOpen) {
+        *OS << "<span ";
+        State[I].write(*OS);
+        *OS << ">";
+      }
+      if (Code[I] == '\n')
+        *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
+      else
+        llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
+      if (NeedClose) *OS << "</span>";
+    }
+    *OS << "</code>\n";
+    *OS << "</template>";
+  }
+
+  // Write the CFG diagram, a graph of basic blocks.
+  // Laying out graphs is hard, so we construct a graphviz description and shell
+  // out to `dot` to turn it into an SVG.
+  void writeCFG() {
+    *OS << "<template data-copy='cfg'>\n";
+    if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
+      *OS << *SVG;
+    else
+      *OS << "Can't draw CFG: " << toString(SVG.takeError());
+    *OS << "</template>\n";
+  }
+
+  // Produce a graphviz description of a CFG.
+  static std::string buildCFGDot(const clang::CFG &CFG) {
+    std::string Graph;
+    llvm::raw_string_ostream GraphS(Graph);
+    // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
+    GraphS << R"(digraph {
+      tooltip=" "
+      node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
+      edge[tooltip = " "]
+)";
+    for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
+      GraphS << "  " << blockID(I) << " [id=" << blockID(I) << "]\n";
+    for (const auto *Block : CFG) {
+      for (const auto &Succ : Block->succs()) {
+        GraphS << "  " << blockID(Block->getBlockID()) << " -> "
+               << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
+      }
+    }
+    GraphS << "}\n";
+    return Graph;
+  }
+};
+
+// Nothing interesting here, just subprocess/temp-file plumbing.
+llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
+  auto Dot = llvm::sys::findProgramByName("dot");
+  if (!Dot)
+    return llvm::createStringError(Dot.getError(),
+                                   "Can't draw CFG: 'dot' not found on PATH");
+
+  // Create input and output files for `dot` subprocess.
+  // (We create the output file as empty, to reserve the temp filename).
+  llvm::SmallString<256> Input, Output;
+  int InputFD;
+  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
+                                                   Input))
+    return llvm::createStringError(EC, "failed to create `dot` temp input");
+  llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
+  auto DeleteInput =
+      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
+  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
+    return llvm::createStringError(EC, "failed to create `dot` temp output");
+  auto DeleteOutput =
+      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
+
+  std::vector<std::optional<llvm::StringRef>> Redirects = {
+      Input, Output,
+      /*stderr=*/std::nullopt};
+  std::string ErrMsg;
+  int Code = llvm::sys::ExecuteAndWait(
+      *Dot, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
+      /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
+  if (!ErrMsg.empty())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "'dot' failed: " + ErrMsg);
+  if (Code != 0)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "'dot' failed (" + llvm::Twine(Code) + ")");
+
+  auto Buf = llvm::MemoryBuffer::getFile(Output);
+  if (!Buf)
+    return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
+
+  // Output has <?xml> prefix we don't want. Skip to <svg> tag.
+  llvm::StringRef Result = Buf.get()->getBuffer();
+  auto Pos = Result.find("<svg");
+  if (Pos == llvm::StringRef::npos)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Can't find <svg> tag in `dot` output");
+  return Result.substr(Pos).str();
+}
+
+} // namespace
+
+std::unique_ptr<Logger>
+Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
+  return std::make_unique<HTMLLogger>(std::move(Streams));
+}
+
+} // namespace clang::dataflow

diff  --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.css b/clang/lib/Analysis/FlowSensitive/HTMLLogger.css
new file mode 100644
index 0000000000000..4877c1264c83b
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.css
@@ -0,0 +1,118 @@
+/*===-- HTMLLogger.css ----------------------------------------------------===
+*
+* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+* See https://llvm.org/LICENSE.txt for license information.
+* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+*
+*===----------------------------------------------------------------------===*/
+html { font-family: sans-serif; }
+body { margin: 0; display: flex; justify-content: left; }
+body > * { box-sizing: border-box; }
+body > section {
+  border: 1px solid black;
+  min-width: 20em;
+  overflow: auto;
+  max-height: 100vh;
+}
+section header {
+  background-color: #008;
+  color: white;
+  font-weight: bold;
+  font-size: large;
+}
+section h2 {
+  font-size: medium;
+  margin-bottom: 0.5em;
+  padding-top: 0.5em;
+  border-top: 1px solid #aaa;
+}
+#timeline {
+  min-width: 0;
+}
+#timeline .entry.hover {
+  background-color: #aaa;
+}
+#timeline .entry.iter-select {
+  background-color: #aac;
+}
+
+#bb-elements {
+  font-family: monospace;
+  font-size: x-small;
+  border-collapse: collapse;
+}
+#bb-elements td:nth-child(1) {
+  text-align: right;
+  width: 4em;
+  border-right: 1px solid #008;
+  padding: 0.3em 0.5em;
+
+  font-weight: bold;
+  color: #888;
+};
+#bb-elements tr.hover {
+  background-color: #abc;
+}
+#bb-elements tr.elt-select {
+  background-color: #acf;
+}
+#iterations {
+  display: flex;
+}
+#iterations .chooser {
+  flex-grow: 1;
+  text-align: center;
+}
+#iterations .chooser:not(.iter-select).hover {
+  background-color: #aaa;
+}
+#iterations .iter-select {
+  font-weight: bold;
+  background-color: #ccc;
+}
+#iterations .chooser:not(.iter-select) {
+  text-decoration: underline;
+  color: blue;
+}
+
+code.filename {
+  font-weight: bold;
+  color: black;
+  background-color: #ccc;
+  display: block;
+  text-align: center;
+}
+code.line {
+  display: block;
+  white-space: pre;
+}
+code.line:before { /* line numbers */
+  content: attr(data-line);
+  display: inline-block;
+  width: 2em;
+  text-align: right;
+  padding-right: 2px;
+  background-color: #ccc;
+  border-right: 1px solid #888;
+  margin-right: 8px;
+}
+code.line:has(.bb-select):before {
+  border-right: 4px solid black;
+  margin-right: 5px;
+}
+.c.hover, .bb.hover {
+  filter: saturate(200%) brightness(90%);
+}
+.c.elt-select {
+  box-shadow: inset 0 -4px 2px -2px #a00;
+}
+.bb.bb-select polygon {
+  stroke-width: 4px;
+  filter: brightness(70%) saturate(150%);
+}
+.bb { user-select: none; }
+.bb polygon { fill: white; }
+#cfg {
+  position: relative;
+  margin-left: 0.5em;
+}

diff  --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.html b/clang/lib/Analysis/FlowSensitive/HTMLLogger.html
new file mode 100644
index 0000000000000..c97f3ea8ac7d4
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.html
@@ -0,0 +1,73 @@
+<!doctype html>
+<html>
+<!-- HTMLLogger.cpp ----------------------------------------------------
+
+ Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ See https://llvm.org/LICENSE.txt for license information.
+ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===------------------------------------------------------------------------>
+
+<head>
+<?INJECT?>
+</head>
+
+<body>
+
+<section id="timeline" data-selection="">
+<header>Timeline</header>
+<template data-for="entry in timeline">
+  <div id="{{entry.block}}:{{entry.iter}}" data-bb="{{entry.block}}" class="entry">{{entry.block}} ({{entry.iter}})</div>
+</template>
+</section>
+
+<section id="function" data-selection="">
+<header>Function</header>
+<div id="code"></div>
+<div id="cfg"></div>
+</section>
+
+<section id="block" data-selection="bb">
+<header><template>Block {{selection.bb}}</template></header>
+<div id="iterations">
+  <template data-for="i in Array(cfg[selection.bb].iters).keys()">
+    <a class="chooser {{selection.bb}}:{{i+1}}" data-iter="{{selection.bb}}:{{i+1}}">Iteration {{i+1}}</a>
+  </template>
+</div>
+<table id="bb-elements">
+<template data-for="elt in cfg[selection.bb].elements">
+  <tr id="{{selection.bb}}.{{elt_index}}">
+    <td class="{{selection.bb}}">{{selection.bb}}.{{elt_index}}</td>
+    <td>{{elt}}</td>
+  </tr>
+</template>
+</table>
+</section>
+
+<section id="element" data-selection="iter,elt">
+<template data-let="state = states[selection.iter + '_' + selection.elt]">
+<header>
+  <template data-if="state.element == 0">{{state.block}} (iteration {{state.iter}}) initial state</template>
+  <template data-if="state.element != 0">Element {{selection.elt}} (iteration {{state.iter}})</template>
+</header>
+<template data-if="state.logs">
+  <h2>Logs</h2>
+  <pre>{{state.logs}}</pre>
+</template>
+<h2>Built-in lattice</h2>
+<pre>{{state.builtinLattice}}</pre>
+</template>
+</section>
+
+<script>
+addBBColors(Object.keys(HTMLLoggerData.cfg).length);
+watchSelection(HTMLLoggerData);
+updateSelection({}, HTMLLoggerData);
+// Copy code and cfg from <template>s into the body.
+for (tmpl of document.querySelectorAll('template[data-copy]'))
+  document.getElementById(tmpl.dataset.copy).replaceChildren(
+      ...tmpl.content.cloneNode(/*deep=*/true).childNodes);
+</script>
+
+</body>
+</html>

diff  --git a/clang/lib/Analysis/FlowSensitive/HTMLLogger.js b/clang/lib/Analysis/FlowSensitive/HTMLLogger.js
new file mode 100644
index 0000000000000..5958418bc1e93
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/HTMLLogger.js
@@ -0,0 +1,216 @@
+//===-- HTMLLogger.js -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Based on selected objects, hide/show sections & populate data from templates.
+//
+// For example, if the selection is {bb="BB4", elt="BB4.6" iter="BB4:2"}:
+//   - show the "block" and "element" sections
+//   - re-render templates within these sections (if selection changed)
+//   - apply "bb-select" to items with class class "BB4", etc
+let selection = {};
+function updateSelection(changes, data) {
+  Object.assign(selection, changes);
+
+  data = Object.create(data);
+  data.selection = selection;
+  for (root of document.querySelectorAll('[data-selection]'))
+    updateSection(root, data);
+
+  for (var k in changes)
+    applyClassIf(k + '-select', classSelector(changes[k]));
+}
+
+// Given <section data-selection="x,y">:
+//  - hide section if selections x or y are null
+//  - re-render templates if x or y have changed
+function updateSection(root, data) {
+  let changed = root.selection == null;
+  root.selection ||= {};
+  for (key of root.dataset.selection.split(',')) {
+    if (!key) continue;
+    if (data.selection[key] != root.selection[key]) {
+      root.selection[key] = data.selection[key];
+      changed = true;
+    }
+    if (data.selection[key] == null) {
+      root.hidden = true;
+      return;
+    }
+  }
+  if (changed) {
+    root.hidden = false;
+    for (tmpl of root.getElementsByTagName('template'))
+      reinflate(tmpl, data);
+  }
+}
+
+// Expands template `tmpl` based on input `data`:
+//  - interpolates {{expressions}} in text and attributes
+//  - <template> tags can modify expansion: if, for etc
+// Outputs to `parent` element, inserting before `next`.
+function inflate(tmpl, data, parent, next) {
+  // We use eval() as our expression language in templates!
+  // The templates are static and trusted.
+  let evalExpr = (expr, data) => eval('with (data) { ' + expr + ' }');
+  let interpolate = (str, data) =>
+      str.replace(/\{\{(.*?)\}\}/g, (_, expr) => evalExpr(expr, data))
+  // Anything other than <template> tag: copy, interpolate, recursively inflate.
+  if (tmpl.nodeName != 'TEMPLATE') {
+    let clone = tmpl.cloneNode();
+    clone.inflated = true;
+    if (clone instanceof Text)
+      clone.textContent = interpolate(clone.textContent, data);
+    if (clone instanceof Element) {
+      for (attr of clone.attributes)
+        attr.value = interpolate(attr.value, data);
+      for (c of tmpl.childNodes)
+        inflate(c, data, clone, /*next=*/null);
+    }
+    return parent.insertBefore(clone, next);
+  }
+  // <template> tag handling. Base case: recursively inflate.
+  function handle(data) {
+    for (c of tmpl.content.childNodes)
+      inflate(c, data, parent, next);
+  }
+  // Directives on <template> tags modify behavior.
+  const directives = {
+    // data-for="x in expr": expr is enumerable, bind x to each in turn
+    'for': (nameInExpr, data, proceed) => {
+      let [name, expr] = nameInExpr.split(' in ');
+      let newData = Object.create(data);
+      let index = 0;
+      for (val of evalExpr(expr, data) || []) {
+        newData[name] = val;
+        newData[name + '_index'] = index++;
+        proceed(newData);
+      }
+    },
+    // data-if="expr": only include contents if expression is truthy
+    'if': (expr, data, proceed) => { if (evalExpr(expr, data)) proceed(data); },
+    // data-let="x = expr": bind x to value of expr
+    'let': (nameEqExpr, data, proceed) => {
+      let [name, expr] = nameEqExpr.split(' = ');
+      let newData = Object.create(data);
+      newData[name] = evalExpr(expr, data);
+      proceed(newData);
+    },
+  }
+  // Compose directive handlers on top of the base handler.
+  for (let [dir, value] of Object.entries(tmpl.dataset).reverse()) {
+    if (dir in directives) {
+      let proceed = handle;
+      handle = (data) => directives[dir](value, data, proceed);
+    }
+  }
+  handle(data);
+}
+// Expand a template, after first removing any prior expansion of it.
+function reinflate(tmpl, data) {
+  // Clear previously rendered template contents.
+  while (tmpl.nextSibling && tmpl.nextSibling.inflated)
+    tmpl.parentNode.removeChild(tmpl.nextSibling);
+  inflate(tmpl, data, tmpl.parentNode, tmpl.nextSibling);
+}
+
+// Handle a mouse event on a region containing selectable items.
+// This might end up changing the hover state or the selection state.
+//
+// targetSelector describes what target HTML element is selectable.
+// targetToID specifies how to determine the selection from it:
+//   hover: a function from target to the class name to highlight
+//   bb: a function from target to the basic-block name to select (BB4)
+//   elt: a function from target to the CFG element name to select (BB4.5)
+//   iter: a function from target to the BB iteration to select (BB4:2)
+// If an entry is missing, the selection is unmodified.
+// If an entry is null, the selection is always cleared.
+function mouseEventHandler(event, targetSelector, targetToID, data) {
+  var target = event.type == "mouseout" ? null : event.target.closest(targetSelector);
+  let selTarget = k => (target && targetToID[k]) ? targetToID[k](target) : null;
+  if (event.type == "click") {
+    let newSel = {};
+    for (var k in targetToID) {
+      if (k == 'hover') continue;
+      let t = selTarget(k);
+      newSel[k] = t;
+    }
+    updateSelection(newSel, data);
+  } else if ("hover" in targetToID) {
+    applyClassIf("hover", classSelector(selTarget("hover")));
+  }
+}
+function watch(rootSelector, targetSelector, targetToID, data) {
+  var root = document.querySelector(rootSelector);
+  for (event of ['mouseout', 'mousemove', 'click'])
+    root.addEventListener(event, e => mouseEventHandler(e, targetSelector, targetToID, data));
+}
+function watchSelection(data) {
+  let lastIter = (bb) => `${bb}:${data.cfg[bb].iters}`;
+  watch('#code', '.c', {
+    hover: e => e.dataset.elt,
+    bb: e => e.dataset.bb,
+    elt: e => e.dataset.elt,
+    // If we're already viewing an iteration of this BB, stick with the same.
+    iter: e => (selection.iter && selection.bb == e.dataset.bb) ? selection.iter : lastIter(e.dataset.bb),
+  }, data);
+  watch('#cfg', '.bb', {
+    hover: e => e.id,
+    bb: e => e.id,
+    elt: e => e.id + ".0",
+    iter: e => lastIter(e.id),
+  }, data);
+  watch('#timeline', '.entry', {
+    hover: e => [e.id, e.dataset.bb],
+    bb: e => e.dataset.bb,
+    elt: e => e.dataset.bb + ".0",
+    iter: e => e.id,
+  }, data);
+  watch('#bb-elements', 'tr', {
+    hover: e => e.id,
+    elt: e => e.id,
+  }, data);
+  watch('#iterations', '.chooser', {
+    hover: e => e.dataset.iter,
+    iter: e => e.dataset.iter,
+  }, data);
+  updateSelection({}, data);
+}
+function applyClassIf(cls, query) {
+  document.querySelectorAll('.' + cls).forEach(elt => elt.classList.remove(cls));
+  document.querySelectorAll(query).forEach(elt => elt.classList.add(cls));
+}
+// Turns a class name into a CSS selector matching it, with some wrinkles:
+// - we treat id="foo" just like class="foo" to avoid repetition in the HTML
+// - cls can be an array of strings, we match them all
+function classSelector(cls) {
+  if (cls == null) return null;
+  if (Array.isArray(cls)) return cls.map(classSelector).join(', ');
+  var escaped = cls.replace('.', '\\.').replace(':', '\\:');
+  // don't require id="foo" class="foo"
+  return '.' + escaped + ", #" + escaped;
+}
+
+// Add a stylesheet defining colors for n basic blocks.
+function addBBColors(n) {
+  let sheet = new CSSStyleSheet();
+  // hex values to subtract from fff to get a base color
+  options = [0x001, 0x010, 0x011, 0x100, 0x101, 0x110, 0x111];
+  function color(hex) {
+    return "#" + hex.toString(16).padStart(3, "0");
+  }
+  function add(selector, property, hex) {
+    sheet.insertRule(`${selector} { ${property}: ${color(hex)}; }`)
+  }
+  for (var i = 0; i < n; ++i) {
+    let opt = options[i%options.length];
+    add(`.B${i}`, 'background-color', 0xfff - 2*opt);
+    add(`#B${i} polygon`, 'fill', 0xfff - 2*opt);
+    add(`#B${i} polygon`, 'stroke', 0x888 - 4*opt);
+  }
+  document.adoptedStyleSheets.push(sheet);
+}

diff  --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
index eab37045c393e..0b8579a45b2df 100644
--- a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
@@ -9,6 +9,7 @@
 
 namespace clang::dataflow::test {
 namespace {
+using testing::HasSubstr;
 
 struct TestLattice {
   int Elements = 0;
@@ -83,19 +84,24 @@ class TestLogger : public Logger {
   void logText(llvm::StringRef Text) override { OS << Text << "\n"; }
 };
 
-TEST(LoggerTest, Sequence) {
+AnalysisInputs<TestAnalysis> makeInputs() {
   const char *Code = R"cpp(
 int target(bool b, int p, int q) {
   return b ? p : q;    
 }
 )cpp";
+  static const std::vector<std::string> Args = {
+      "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"};
 
   auto Inputs = AnalysisInputs<TestAnalysis>(
       Code, ast_matchers::hasName("target"),
       [](ASTContext &C, Environment &) { return TestAnalysis(C); });
-  std::vector<std::string> Args = {
-      "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"};
   Inputs.ASTBuildArgs = Args;
+  return Inputs;
+}
+
+TEST(LoggerTest, Sequence) {
+  auto Inputs = makeInputs();
   std::string Log;
   TestLogger Logger(Log);
   Inputs.BuiltinOptions.Log = &Logger;
@@ -148,5 +154,29 @@ endAnalysis()
 )");
 }
 
+TEST(LoggerTest, HTML) {
+  auto Inputs = makeInputs();
+  std::vector<std::string> Logs;
+  auto Logger = Logger::html([&]() {
+    Logs.emplace_back();
+    return std::make_unique<llvm::raw_string_ostream>(Logs.back());
+  });
+  Inputs.BuiltinOptions.Log = Logger.get();
+
+  ASSERT_THAT_ERROR(checkDataflow<TestAnalysis>(std::move(Inputs),
+                                                [](const AnalysisOutputs &) {}),
+                    llvm::Succeeded());
+
+  // Simple smoke tests: we can't meaningfully test the behavior.
+  ASSERT_THAT(Logs, testing::SizeIs(1));
+  EXPECT_THAT(Logs[0], HasSubstr("function updateSelection")) << "embeds JS";
+  EXPECT_THAT(Logs[0], HasSubstr("html {")) << "embeds CSS";
+  EXPECT_THAT(Logs[0], HasSubstr("b (ImplicitCastExpr")) << "has CFG elements";
+  EXPECT_THAT(Logs[0], HasSubstr("\"B3:1_B3.1\":"))
+      << "has analysis point state";
+  EXPECT_THAT(Logs[0], HasSubstr("transferBranch(0)")) << "has analysis logs";
+  EXPECT_THAT(Logs[0], HasSubstr("LocToVal")) << "has built-in lattice dump";
+}
+
 } // namespace
 } // namespace clang::dataflow::test

diff  --git a/clang/utils/bundle_resources.py b/clang/utils/bundle_resources.py
new file mode 100644
index 0000000000000..692c0ba02b09b
--- /dev/null
+++ b/clang/utils/bundle_resources.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+#===- bundle_resources.py - Generate string constants with file contents. ===
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===
+
+# Usage: bundle-resources.py foo.inc a.js path/b.css ...
+# Produces foo.inc containing:
+#   const char a_js[] = "...";
+#   const char b_css[] = "...";
+import os
+import sys
+
+outfile = sys.argv[1]
+infiles = sys.argv[2:]
+
+with open(outfile, 'w') as out:
+  for filename in infiles:
+    varname = os.path.basename(filename).replace('.', '_')
+    out.write("const char " + varname + "[] = \n");
+    # MSVC limits each chunk of string to 2k, so split by lines.
+    # The overall limit is 64k, which ought to be enough for anyone.
+    for line in open(filename).read().split('\n'):
+      out.write('  R"x(' + line + ')x" "\\n"\n' )
+    out.write('  ;\n');


        


More information about the cfe-commits mailing list