[llvm] r359186 - [Support] Add JSON streaming output API, faster where the heavy value types aren't needed.

Sam McCall via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 25 05:51:42 PDT 2019


Author: sammccall
Date: Thu Apr 25 05:51:42 2019
New Revision: 359186

URL: http://llvm.org/viewvc/llvm-project?rev=359186&view=rev
Log:
[Support] Add JSON streaming output API, faster where the heavy value types aren't needed.

Summary:
There's still a little bit of constant factor that could be trimmed (e.g.
more overloads to avoid round-tripping primitives through json::Value).
But this solves the memory scaling problem, and greatly improves the performance
constant factor, and the API should leave room for optimization if needed.

Adapt TimeProfiler to use it, eliminating almost all the performance regression
from r358476.

Performance test on my machine:
perf stat -r 5 ~/llvmbuild-opt/bin/clang++ -w -S -ftime-trace -mllvm -time-trace-granularity=0 spirit.cpp

Handcrafted JSON (HEAD=r358532 with r358476 reverted): 2480ms
json::Value (HEAD): 2757ms (+11%)
After this patch: 2520 ms (+1.6%)

Reviewers: anton-afanasyev, lebedev.ri

Subscribers: kristina, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60804

Modified:
    llvm/trunk/include/llvm/Support/JSON.h
    llvm/trunk/lib/Support/JSON.cpp
    llvm/trunk/lib/Support/TimeProfiler.cpp
    llvm/trunk/unittests/Support/JSONTest.cpp

Modified: llvm/trunk/include/llvm/Support/JSON.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/JSON.h?rev=359186&r1=359185&r2=359186&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/JSON.h (original)
+++ llvm/trunk/include/llvm/Support/JSON.h Thu Apr 25 05:51:42 2019
@@ -21,6 +21,9 @@
 /// - a convention and helpers for mapping between json::Value and user-defined
 ///   types. See fromJSON(), ObjectMapper, and the class comment on Value.
 ///
+/// - an output API json::OStream which can emit JSON without materializing
+///   all structures as json::Value.
+///
 /// Typically, JSON data would be read from an external source, parsed into
 /// a Value, and then converted into some native data structure before doing
 /// real work on it. (And vice versa when writing).
@@ -437,11 +440,6 @@ public:
     return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr;
   }
 
-  /// Serializes this Value to JSON, writing it to the provided stream.
-  /// The formatting is compact (no extra whitespace) and deterministic.
-  /// For pretty-printing, use the formatv() format_provider below.
-  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
-
 private:
   void destroy();
   void copyFrom(const Value &M);
@@ -462,9 +460,7 @@ private:
     return *static_cast<T *>(Storage);
   }
 
-  template <typename Indenter>
-  void print(llvm::raw_ostream &, const Indenter &) const;
-  friend struct llvm::format_provider<llvm::json::Value>;
+  friend class OStream;
 
   enum ValueType : char {
     T_Null,
@@ -486,7 +482,6 @@ private:
 
 bool operator==(const Value &, const Value &);
 inline bool operator!=(const Value &L, const Value &R) { return !(L == R); }
-llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Value &);
 
 /// ObjectKey is a used to capture keys in Object. Like Value but:
 ///   - only strings are allowed
@@ -699,6 +694,152 @@ public:
     return llvm::inconvertibleErrorCode();
   }
 };
+
+/// json::OStream allows writing well-formed JSON without materializing
+/// all structures as json::Value ahead of time.
+/// It's faster, lower-level, and less safe than OS << json::Value.
+///
+/// Only one "top-level" object can be written to a stream.
+/// Simplest usage involves passing lambdas (Blocks) to fill in containers:
+///
+///   json::OStream J(OS);
+///   J.array([&]{
+///     for (const Event &E : Events)
+///       J.object([&] {
+///         J.attribute("timestamp", int64_t(E.Time));
+///         J.attributeArray("participants", [&] {
+///           for (const Participant &P : E.Participants)
+///             J.string(P.toString());
+///         });
+///       });
+///   });
+///
+/// This would produce JSON like:
+///
+///   [
+///     {
+///       "timestamp": 19287398741,
+///       "participants": [
+///         "King Kong",
+///         "Miley Cyrus",
+///         "Cleopatra"
+///       ]
+///     },
+///     ...
+///   ]
+///
+/// The lower level begin/end methods (arrayBegin()) are more flexible but
+/// care must be taken to pair them correctly:
+///
+///   json::OStream J(OS);
+//    J.arrayBegin();
+///   for (const Event &E : Events) {
+///     J.objectBegin();
+///     J.attribute("timestamp", int64_t(E.Time));
+///     J.attributeBegin("participants");
+///     for (const Participant &P : E.Participants)
+///       J.value(P.toString());
+///     J.attributeEnd();
+///     J.objectEnd();
+///   }
+///   J.arrayEnd();
+///
+/// If the call sequence isn't valid JSON, asserts will fire in debug mode.
+/// This can be mismatched begin()/end() pairs, trying to emit attributes inside
+/// an array, and so on.
+/// With asserts disabled, this is undefined behavior.
+class OStream {
+ public:
+  using Block = llvm::function_ref<void()>;
+  // OStream does not buffer internally, and need never be flushed or destroyed.
+  // If IndentSize is nonzero, output is pretty-printed.
+  explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0)
+      : OS(OS), IndentSize(IndentSize) {
+    Stack.emplace_back();
+  }
+  ~OStream() {
+    assert(Stack.size() == 1 && "Unmatched begin()/end()");
+    assert(Stack.back().Ctx == Singleton);
+    assert(Stack.back().HasValue && "Did not write top-level value");
+  }
+
+  // High level functions to output a value.
+  // Valid at top-level (exactly once), in an attribute value (exactly once),
+  // or in an array (any number of times).
+
+  /// Emit a self-contained value (number, string, vector<string> etc).
+  void value(const Value &V);
+  /// Emit an array whose elements are emitted in the provided Block.
+  void array(Block Contents) {
+    arrayBegin();
+    Contents();
+    arrayEnd();
+  }
+  /// Emit an object whose elements are emitted in the provided Block.
+  void object(Block Contents) {
+    objectBegin();
+    Contents();
+    objectEnd();
+  }
+
+  // High level functions to output object attributes.
+  // Valid only within an object (any number of times).
+
+  /// Emit an attribute whose value is self-contained (number, vector<int> etc).
+  void attribute(llvm::StringRef Key, const Value& Contents) {
+    attributeImpl(Key, [&] { value(Contents); });
+  }
+  /// Emit an attribute whose value is an array with elements from the Block.
+  void attributeArray(llvm::StringRef Key, Block Contents) {
+    attributeImpl(Key, [&] { array(Contents); });
+  }
+  /// Emit an attribute whose value is an object with attributes from the Block.
+  void attributeObject(llvm::StringRef Key, Block Contents) {
+    attributeImpl(Key, [&] { object(Contents); });
+  }
+
+  // Low-level begin/end functions to output arrays, objects, and attributes.
+  // Must be correctly paired. Allowed contexts are as above.
+
+  void arrayBegin();
+  void arrayEnd();
+  void objectBegin();
+  void objectEnd();
+  void attributeBegin(llvm::StringRef Key);
+  void attributeEnd();
+
+ private:
+  void attributeImpl(llvm::StringRef Key, Block Contents) {
+    attributeBegin(Key);
+    Contents();
+    attributeEnd();
+  }
+
+  void valueBegin();
+  void newline();
+
+  enum Context {
+    Singleton, // Top level, or object attribute.
+    Array,
+    Object,
+  };
+  struct State {
+    Context Ctx = Singleton;
+    bool HasValue = false;
+  };
+  llvm::SmallVector<State, 16> Stack; // Never empty.
+  llvm::raw_ostream &OS;
+  unsigned IndentSize;
+  unsigned Indent = 0;
+};
+
+/// Serializes this Value to JSON, writing it to the provided stream.
+/// The formatting is compact (no extra whitespace) and deterministic.
+/// For pretty-printing, use the formatv() format_provider below.
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) {
+  OStream(OS).value(V);
+  return OS;
+}
 } // namespace json
 
 /// Allow printing json::Value with formatv().

Modified: llvm/trunk/lib/Support/JSON.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/JSON.cpp?rev=359186&r1=359185&r2=359186&view=diff
==============================================================================
--- llvm/trunk/lib/Support/JSON.cpp (original)
+++ llvm/trunk/lib/Support/JSON.cpp Thu Apr 25 05:51:42 2019
@@ -560,9 +560,6 @@ std::string fixUTF8(llvm::StringRef S) {
   return Res;
 }
 
-} // namespace json
-} // namespace llvm
-
 static void quote(llvm::raw_ostream &OS, llvm::StringRef S) {
   OS << '\"';
   for (unsigned char C : S) {
@@ -593,106 +590,129 @@ static void quote(llvm::raw_ostream &OS,
   OS << '\"';
 }
 
-enum IndenterAction {
-  Indent,
-  Outdent,
-  Newline,
-  Space,
-};
-
-// Prints JSON. The indenter can be used to control formatting.
-template <typename Indenter>
-void llvm::json::Value::print(raw_ostream &OS, const Indenter &I) const {
-  switch (Type) {
-  case T_Null:
+void llvm::json::OStream::value(const Value &V) {
+  switch (V.kind()) {
+  case Value::Null:
+    valueBegin();
     OS << "null";
-    break;
-  case T_Boolean:
-    OS << (as<bool>() ? "true" : "false");
-    break;
-  case T_Double:
-    OS << format("%.*g", std::numeric_limits<double>::max_digits10,
-                 as<double>());
-    break;
-  case T_Integer:
-    OS << as<int64_t>();
-    break;
-  case T_StringRef:
-    quote(OS, as<StringRef>());
-    break;
-  case T_String:
-    quote(OS, as<std::string>());
-    break;
-  case T_Object: {
-    bool Comma = false;
-    OS << '{';
-    I(Indent);
-    for (const auto *P : sortedElements(as<json::Object>())) {
-      if (Comma)
-        OS << ',';
-      Comma = true;
-      I(Newline);
-      quote(OS, P->first);
-      OS << ':';
-      I(Space);
-      P->second.print(OS, I);
-    }
-    I(Outdent);
-    if (Comma)
-      I(Newline);
-    OS << '}';
-    break;
+    return;
+  case Value::Boolean:
+    valueBegin();
+    OS << (*V.getAsBoolean() ? "true" : "false");
+    return;
+  case Value::Number:
+    valueBegin();
+    if (V.Type == Value::T_Integer)
+      OS << *V.getAsInteger();
+    else
+      OS << format("%.*g", std::numeric_limits<double>::max_digits10,
+                   *V.getAsNumber());
+    return;
+  case Value::String:
+    valueBegin();
+    quote(OS, *V.getAsString());
+    return;
+  case Value::Array:
+    return array([&] {
+      for (const Value &E : *V.getAsArray())
+        value(E);
+    });
+  case Value::Object:
+    return object([&] {
+      for (const Object::value_type *E : sortedElements(*V.getAsObject()))
+        attribute(E->first, E->second);
+    });
   }
-  case T_Array: {
-    bool Comma = false;
-    OS << '[';
-    I(Indent);
-    for (const auto &E : as<json::Array>()) {
-      if (Comma)
-        OS << ',';
-      Comma = true;
-      I(Newline);
-      E.print(OS, I);
-    }
-    I(Outdent);
-    if (Comma)
-      I(Newline);
-    OS << ']';
-    break;
+}
+
+void llvm::json::OStream::valueBegin() {
+  assert(Stack.back().Ctx != Object && "Only attributes allowed here");
+  if (Stack.back().HasValue) {
+    assert(Stack.back().Ctx != Singleton && "Only one value allowed here");
+    OS << ',';
+  }
+  if (Stack.back().Ctx == Array)
+    newline();
+  Stack.back().HasValue = true;
+}
+
+void llvm::json::OStream::newline() {
+  if (IndentSize) {
+    OS.write('\n');
+    OS.indent(Indent);
   }
+}
+
+void llvm::json::OStream::arrayBegin() {
+  valueBegin();
+  Stack.emplace_back();
+  Stack.back().Ctx = Array;
+  Indent += IndentSize;
+  OS << '[';
+}
+
+void llvm::json::OStream::arrayEnd() {
+  assert(Stack.back().Ctx == Array);
+  Indent -= IndentSize;
+  if (Stack.back().HasValue)
+    newline();
+  OS << ']';
+  Stack.pop_back();
+  assert(!Stack.empty());
+}
+
+void llvm::json::OStream::objectBegin() {
+  valueBegin();
+  Stack.emplace_back();
+  Stack.back().Ctx = Object;
+  Indent += IndentSize;
+  OS << '{';
+}
+
+void llvm::json::OStream::objectEnd() {
+  assert(Stack.back().Ctx == Object);
+  Indent -= IndentSize;
+  if (Stack.back().HasValue)
+    newline();
+  OS << '}';
+  Stack.pop_back();
+  assert(!Stack.empty());
+}
+
+void llvm::json::OStream::attributeBegin(llvm::StringRef Key) {
+  assert(Stack.back().Ctx == Object);
+  if (Stack.back().HasValue)
+    OS << ',';
+  newline();
+  Stack.back().HasValue = true;
+  Stack.emplace_back();
+  Stack.back().Ctx = Singleton;
+  if (LLVM_LIKELY(isUTF8(Key))) {
+    quote(OS, Key);
+  } else {
+    assert(false && "Invalid UTF-8 in attribute key");
+    quote(OS, fixUTF8(Key));
   }
+  OS.write(':');
+  if (IndentSize)
+    OS.write(' ');
+}
+
+void llvm::json::OStream::attributeEnd() {
+  assert(Stack.back().Ctx == Singleton);
+  assert(Stack.back().HasValue && "Attribute must have a value");
+  Stack.pop_back();
+  assert(Stack.back().Ctx == Object);
 }
 
+} // namespace json
+} // namespace llvm
+
 void llvm::format_provider<llvm::json::Value>::format(
     const llvm::json::Value &E, raw_ostream &OS, StringRef Options) {
-  if (Options.empty()) {
-    OS << E;
-    return;
-  }
   unsigned IndentAmount = 0;
-  if (Options.getAsInteger(/*Radix=*/10, IndentAmount))
+  if (!Options.empty() && Options.getAsInteger(/*Radix=*/10, IndentAmount))
     llvm_unreachable("json::Value format options should be an integer");
-  unsigned IndentLevel = 0;
-  E.print(OS, [&](IndenterAction A) {
-    switch (A) {
-    case Newline:
-      OS << '\n';
-      OS.indent(IndentLevel);
-      break;
-    case Space:
-      OS << ' ';
-      break;
-    case Indent:
-      IndentLevel += IndentAmount;
-      break;
-    case Outdent:
-      IndentLevel -= IndentAmount;
-      break;
-    };
-  });
+  json::OStream(OS, IndentAmount).value(E);
 }
 
-llvm::raw_ostream &llvm::json::operator<<(raw_ostream &OS, const Value &E) {
-  E.print(OS, [](IndenterAction A) { /*ignore*/ });
-  return OS;
-}

Modified: llvm/trunk/lib/Support/TimeProfiler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/TimeProfiler.cpp?rev=359186&r1=359185&r2=359186&view=diff
==============================================================================
--- llvm/trunk/lib/Support/TimeProfiler.cpp (original)
+++ llvm/trunk/lib/Support/TimeProfiler.cpp Thu Apr 25 05:51:42 2019
@@ -87,25 +87,24 @@ struct TimeTraceProfiler {
   void Write(raw_pwrite_stream &OS) {
     assert(Stack.empty() &&
            "All profiler sections should be ended when calling Write");
-
-    json::Array Events;
-    const size_t ExpectedEntryCount =
-        Entries.size() + CountAndTotalPerName.size() + 1;
-    Events.reserve(ExpectedEntryCount);
+    json::OStream J(OS);
+    J.objectBegin();
+    J.attributeBegin("traceEvents");
+    J.arrayBegin();
 
     // Emit all events for the main flame graph.
     for (const auto &E : Entries) {
       auto StartUs = duration_cast<microseconds>(E.Start - StartTime).count();
       auto DurUs = duration_cast<microseconds>(E.Duration).count();
 
-      Events.emplace_back(json::Object{
-          {"pid", 1},
-          {"tid", 0},
-          {"ph", "X"},
-          {"ts", StartUs},
-          {"dur", DurUs},
-          {"name", E.Name},
-          {"args", json::Object{{"detail", E.Detail}}},
+      J.object([&]{
+        J.attribute("pid", 1);
+        J.attribute("tid", 0);
+        J.attribute("ph", "X");
+        J.attribute("ts", StartUs);
+        J.attribute("dur", DurUs);
+        J.attribute("name", E.Name);
+        J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
       });
     }
 
@@ -126,36 +125,36 @@ struct TimeTraceProfiler {
       auto DurUs = duration_cast<microseconds>(E.second.second).count();
       auto Count = CountAndTotalPerName[E.first].first;
 
-      Events.emplace_back(json::Object{
-          {"pid", 1},
-          {"tid", Tid},
-          {"ph", "X"},
-          {"ts", 0},
-          {"dur", DurUs},
-          {"name", "Total " + E.first},
-          {"args", json::Object{{"count", static_cast<int64_t>(Count)},
-                                {"avg ms",
-                                 static_cast<int64_t>(DurUs / Count / 1000)}}},
+      J.object([&]{
+        J.attribute("pid", 1);
+        J.attribute("tid", Tid);
+        J.attribute("ph", "X");
+        J.attribute("ts", 0);
+        J.attribute("dur", DurUs);
+        J.attribute("name", "Total " + E.first);
+        J.attributeObject("args", [&] {
+          J.attribute("count", int64_t(Count));
+          J.attribute("avg ms", int64_t(DurUs / Count / 1000));
+        });
       });
 
       ++Tid;
     }
 
     // Emit metadata event with process name.
-    Events.emplace_back(json::Object{
-        {"cat", ""},
-        {"pid", 1},
-        {"tid", 0},
-        {"ts", 0},
-        {"ph", "M"},
-        {"name", "process_name"},
-        {"args", json::Object{{"name", "clang"}}},
+    J.object([&] {
+      J.attribute("cat", "");
+      J.attribute("pid", 1);
+      J.attribute("tid", 0);
+      J.attribute("ts", 0);
+      J.attribute("ph", "M");
+      J.attribute("name", "process_name");
+      J.attributeObject("args", [&] { J.attribute("name", "clang"); });
     });
 
-    assert(Events.size() == ExpectedEntryCount && "Size prediction failed!");
-
-    OS << formatv("{0:2}", json::Value(json::Object(
-                               {{"traceEvents", std::move(Events)}})));
+    J.arrayEnd();
+    J.attributeEnd();
+    J.objectEnd();
   }
 
   SmallVector<Entry, 16> Stack;

Modified: llvm/trunk/unittests/Support/JSONTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/JSONTest.cpp?rev=359186&r1=359185&r2=359186&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/JSONTest.cpp (original)
+++ llvm/trunk/unittests/Support/JSONTest.cpp Thu Apr 25 05:51:42 2019
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/JSON.h"
+#include "llvm/Support/raw_ostream.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -383,6 +384,44 @@ TEST(JSONTest, Deserialize) {
       << "Wrong type for Optional<T> " << V;
 }
 
+TEST(JSONTest, Stream) {
+  auto StreamStuff = [](unsigned Indent) {
+    std::string S;
+    llvm::raw_string_ostream OS(S);
+    OStream J(OS, Indent);
+    J.object([&] {
+      J.attributeArray("foo", [&] {
+        J.value(nullptr);
+        J.value(42.5);
+        J.arrayBegin();
+        J.value(43);
+        J.arrayEnd();
+      });
+      J.attributeBegin("bar");
+      J.objectBegin();
+      J.objectEnd();
+      J.attributeEnd();
+      J.attribute("baz", "xyz");
+    });
+    return OS.str();
+  };
+
+  const char *Plain = R"({"foo":[null,42.5,[43]],"bar":{},"baz":"xyz"})";
+  EXPECT_EQ(Plain, StreamStuff(0));
+  const char *Pretty = R"({
+  "foo": [
+    null,
+    42.5,
+    [
+      43
+    ]
+  ],
+  "bar": {},
+  "baz": "xyz"
+})";
+  EXPECT_EQ(Pretty, StreamStuff(2));
+}
+
 } // namespace
 } // namespace json
 } // namespace llvm




More information about the llvm-commits mailing list