[llvm] 16619e7 - [JSON] Facility to track position within an object and report errors.

Sam McCall via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 23 15:10:18 PDT 2020


Author: Sam McCall
Date: 2020-09-24T00:09:09+02:00
New Revision: 16619e7139bdcb0021598ba76cb5cf30ac669dbb

URL: https://github.com/llvm/llvm-project/commit/16619e7139bdcb0021598ba76cb5cf30ac669dbb
DIFF: https://github.com/llvm/llvm-project/commit/16619e7139bdcb0021598ba76cb5cf30ac669dbb.diff

LOG: [JSON] Facility to track position within an object and report errors.

This error model should be rich enough for most applications. It comprises:

- a name for the root object, so the user knows what we're parsing
- a path from the root object to the JSON node most associated with the error
- a local error message

This can be presented as an llvm::Error e.g.
  "expected string at ConfigFile.credentials[0].username"

It's designed to be cheap: Paths are a linked list of lightweight
objects on the stack. No heap allocations unless errors are encountered.

A subsequent commit will make use of this in the JSON-to-object
translation facilities: fromJSON and ObjectMapper.
However it's independent of these and can be used for e.g. validation alone.

Another subsequent commit will support showing the error in its context
within the parsed value.

Differential Revision: https://reviews.llvm.org/D88103

Added: 
    

Modified: 
    llvm/include/llvm/Support/JSON.h
    llvm/lib/Support/JSON.cpp
    llvm/unittests/Support/JSONTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index 80fc1ee238e7..e98941d5390f 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -557,6 +557,75 @@ inline bool Object::erase(StringRef K) {
   return M.erase(ObjectKey(K));
 }
 
+/// A "cursor" marking a position within a Value.
+/// The Value is a tree, and this is the path from the root to the current node.
+/// This is used to associate errors with particular subobjects.
+class Path {
+public:
+  class Root;
+
+  /// Records that the value at the current path is invalid.
+  /// Message is e.g. "expected number" and becomes part of the final error.
+  /// This overwrites any previously written error message in the root.
+  void report(llvm::StringLiteral Message);
+
+  /// The root may be treated as a Path.
+  Path(Root &R) : Parent(nullptr), Seg(&R) {}
+  /// Derives a path for an array element: this[Index]
+  Path index(unsigned Index) const { return Path(this, Segment(Index)); }
+  /// Derives a path for an object field: this.Field
+  Path field(StringRef Field) const { return Path(this, Segment(Field)); }
+
+private:
+  /// One element in a JSON path: an object field (.foo) or array index [27].
+  /// Exception: the root Path encodes a pointer to the Path::Root.
+  class Segment {
+    uintptr_t Pointer;
+    unsigned Offset;
+
+  public:
+    Segment() = default;
+    Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {}
+    Segment(llvm::StringRef Field)
+        : Pointer(reinterpret_cast<uintptr_t>(Field.data())),
+          Offset(static_cast<unsigned>(Field.size())) {}
+    Segment(unsigned Index) : Pointer(0), Offset(Index) {}
+
+    bool isField() const { return Pointer != 0; }
+    StringRef field() const {
+      return StringRef(reinterpret_cast<const char *>(Pointer), Offset);
+    }
+    unsigned index() const { return Offset; }
+    Root *root() const { return reinterpret_cast<Root *>(Pointer); }
+  };
+
+  const Path *Parent;
+  Segment Seg;
+
+  Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {}
+};
+
+/// The root is the trivial Path to the root value.
+/// It also stores the latest reported error and the path where it occurred.
+class Path::Root {
+  llvm::StringRef Name;
+  llvm::StringLiteral ErrorMessage;
+  std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed.
+
+  friend void Path::report(llvm::StringLiteral Message);
+
+public:
+  Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {}
+  // No copy/move allowed as there are incoming pointers.
+  Root(Root &&) = delete;
+  Root &operator=(Root &&) = delete;
+  Root(const Root &) = delete;
+  Root &operator=(const Root &) = delete;
+
+  /// Returns the last error reported, or else a generic error.
+  Error getError() const;
+};
+
 // Standard deserializers are provided for primitive types.
 // See comments on Value.
 inline bool fromJSON(const Value &E, std::string &Out) {

diff  --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index db4121cf82bc..f7c51a4ce3aa 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -7,7 +7,9 @@
 //===---------------------------------------------------------------------===//
 
 #include "llvm/Support/JSON.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
@@ -199,6 +201,40 @@ bool operator==(const Value &L, const Value &R) {
   llvm_unreachable("Unknown value kind");
 }
 
+void Path::report(llvm::StringLiteral Msg) {
+  // Walk up to the root context, and count the number of segments.
+  unsigned Count = 0;
+  const Path *P;
+  for (P = this; P->Parent != nullptr; P = P->Parent)
+    ++Count;
+  Path::Root *R = P->Seg.root();
+  // Fill in the error message and copy the path (in reverse order).
+  R->ErrorMessage = Msg;
+  R->ErrorPath.resize(Count);
+  auto It = R->ErrorPath.begin();
+  for (P = this; P->Parent != nullptr; P = P->Parent)
+    *It++ = P->Seg;
+}
+
+Error Path::Root::getError() const {
+  std::string S;
+  raw_string_ostream OS(S);
+  OS << (ErrorMessage.empty() ? "invalid JSON contents" : ErrorMessage);
+  if (ErrorPath.empty()) {
+    if (!Name.empty())
+      OS << " when parsing " << Name;
+  } else {
+    OS << " at " << (Name.empty() ? "(root)" : Name);
+    for (const Path::Segment &S : llvm::reverse(ErrorPath)) {
+      if (S.isField())
+        OS << '.' << S.field();
+      else
+        OS << '[' << S.index() << ']';
+    }
+  }
+  return createStringError(llvm::inconvertibleErrorCode(), OS.str());
+}
+
 namespace {
 // Simple recursive-descent JSON parser.
 class Parser {

diff  --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp
index 986cf5e73a37..8c7b470756e3 100644
--- a/llvm/unittests/Support/JSONTest.cpp
+++ b/llvm/unittests/Support/JSONTest.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/Error.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -461,6 +462,17 @@ TEST(JSONTest, Stream) {
   EXPECT_EQ(Pretty, StreamStuff(2));
 }
 
+TEST(JSONTest, Path) {
+  Path::Root R("foo");
+  Path P = R, A = P.field("a"), B = P.field("b");
+  A.index(1).field("c").index(2).report("boom");
+  EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("boom at foo.a[1].c[2]"));
+  B.field("d").field("e").report("bam");
+  EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("bam at foo.b.d.e"));
+  P.report("oh no");
+  EXPECT_THAT_ERROR(R.getError(), FailedWithMessage("oh no when parsing foo"));
+}
+
 } // namespace
 } // namespace json
 } // namespace llvm


        


More information about the llvm-commits mailing list