[llvm] e79d002 - [MsgPack] MsgPackDocument::readFromBlob now merges

Tim Renouf via llvm-commits llvm-commits at lists.llvm.org
Thu May 21 13:26:59 PDT 2020


Author: Tim Renouf
Date: 2020-05-21T21:26:26+01:00
New Revision: e79d002309cb52b12f9bf4e205ffc9805e2223d7

URL: https://github.com/llvm/llvm-project/commit/e79d002309cb52b12f9bf4e205ffc9805e2223d7
DIFF: https://github.com/llvm/llvm-project/commit/e79d002309cb52b12f9bf4e205ffc9805e2223d7.diff

LOG: [MsgPack] MsgPackDocument::readFromBlob now merges

The readFromBlob method can now be used to read MsgPack into a Document
that already contains something, merging the two. There is a new Merger
argument to readFromBlob, a callback function to resolve conflicts.

Differential Revision: https://reviews.llvm.org/D79671

Change-Id: Icf3e959217fe33cd907a41516c0386aef2847c0c

Added: 
    

Modified: 
    llvm/include/llvm/BinaryFormat/MsgPackDocument.h
    llvm/include/llvm/BinaryFormat/MsgPackReader.h
    llvm/lib/BinaryFormat/MsgPackDocument.cpp
    llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
index 824ecc353207..15be6d5922cf 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -62,6 +62,8 @@ class DocNode {
   };
 
 public:
+  // Default constructor gives an empty node with no associated Document. All
+  // you can do with it is "isEmpty()".
   DocNode() : KindAndDoc(nullptr) {}
 
   // Type methods
@@ -70,8 +72,10 @@ class DocNode {
   bool isScalar() const { return !isMap() && !isArray(); }
   bool isString() const { return getKind() == Type::String; }
 
-  // Accessors
-  bool isEmpty() const { return !KindAndDoc; }
+  // Accessors. isEmpty() returns true for both a default-constructed DocNode
+  // that has no associated Document, and the result of getEmptyNode(), which
+  // does have an associated document.
+  bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; }
   Type getKind() const { return KindAndDoc->Kind; }
   Document *getDocument() const { return KindAndDoc->Doc; }
 
@@ -146,10 +150,10 @@ class DocNode {
   friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
     // This has to cope with one or both of the nodes being default-constructed,
     // such that KindAndDoc is not set.
+    if (Rhs.isEmpty())
+      return false;
     if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
-      if (!Rhs.KindAndDoc)
-        return false;
-      if (!Lhs.KindAndDoc)
+      if (Lhs.isEmpty())
         return true;
       return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
     }
@@ -222,14 +226,15 @@ class ArrayDocNode : public DocNode {
   // Array access methods.
   size_t size() const { return Array->size(); }
   bool empty() const { return !size(); }
+  DocNode &back() const { return Array->back(); }
   ArrayTy::iterator begin() { return Array->begin(); }
   ArrayTy::iterator end() { return Array->end(); }
   void push_back(DocNode N) {
-    assert(N.getDocument() == getDocument());
+    assert(N.isEmpty() || N.getDocument() == getDocument());
     Array->push_back(N);
   }
 
-  /// Element access. This extends the array if necessary.
+  /// Element access. This extends the array if necessary, with empty nodes.
   DocNode &operator[](size_t Index);
 };
 
@@ -247,7 +252,7 @@ class Document {
   DocNode Root;
 
   // The KindAndDocument structs pointed to by nodes in the document.
-  KindAndDocument KindAndDocs[size_t(Type::Extension) + 1];
+  KindAndDocument KindAndDocs[size_t(Type::Empty) + 1];
 
   // Whether YAML output uses hex for UInt.
   bool HexMode = false;
@@ -255,7 +260,7 @@ class Document {
 public:
   Document() {
     clear();
-    for (unsigned T = 0; T != size_t(Type::Extension) + 1; ++T)
+    for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T)
       KindAndDocs[T] = {this, Type(T)};
   }
 
@@ -263,7 +268,13 @@ class Document {
   DocNode &getRoot() { return Root; }
 
   /// Restore the Document to an empty state.
-  void clear() { getRoot() = getNode(); }
+  void clear() { getRoot() = getEmptyNode(); }
+
+  /// Create an empty node associated with this Document.
+  DocNode getEmptyNode() {
+    auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]);
+    return N;
+  }
 
   /// Create a nil node associated with this Document.
   DocNode getNode() {
@@ -345,15 +356,35 @@ class Document {
     return N.getArray();
   }
 
-  /// Read a MsgPack document from a binary MsgPack blob.
-  /// The blob data must remain valid for the lifetime of this Document (because
-  /// a string object in the document contains a StringRef into the original
-  /// blob).
-  /// If Multi, then this sets root to an array and adds top-level objects to
-  /// it. If !Multi, then it only reads a single top-level object, even if there
-  /// are more, and sets root to that.
-  /// Returns false if failed due to illegal format.
-  bool readFromBlob(StringRef Blob, bool Multi);
+  /// Read a document from a binary msgpack blob, merging into anything already
+  /// in the Document. The blob data must remain valid for the lifetime of this
+  /// Document (because a string object in the document contains a StringRef
+  /// into the original blob). If Multi, then this sets root to an array and
+  /// adds top-level objects to it. If !Multi, then it only reads a single
+  /// top-level object, even if there are more, and sets root to that. Returns
+  /// false if failed due to illegal format or merge error.
+  ///
+  /// The Merger arg is a callback function that is called when the merge has a
+  /// conflict, that is, it is trying to set an item that is already set. If the
+  /// conflict cannot be resolved, the callback function returns -1. If the
+  /// conflict can be resolved, the callback returns a non-negative number and
+  /// sets *DestNode to the resolved node. The returned non-negative number is
+  /// significant only for an array node; it is then the array index to start
+  /// populating at. That allows Merger to choose whether to merge array
+  /// elements (returns 0) or append new elements (returns existing size).
+  ///
+  /// If SrcNode is an array or map, the resolution must be that *DestNode is an
+  /// array or map respectively, although it could be the array or map
+  /// (respectively) that was already there. MapKey is the key if *DestNode is a
+  /// map entry, a nil node otherwise.
+  ///
+  /// The default for Merger is to disallow any conflict.
+  bool readFromBlob(
+      StringRef Blob, bool Multi,
+      function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
+          Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+            return -1;
+          });
 
   /// Write a MsgPack document to a binary MsgPack blob.
   void writeToBlob(std::string &Blob);

diff  --git a/llvm/include/llvm/BinaryFormat/MsgPackReader.h b/llvm/include/llvm/BinaryFormat/MsgPackReader.h
index bd760f7d7c87..a6ae542637fd 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackReader.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackReader.h
@@ -57,6 +57,7 @@ enum class Type : uint8_t {
   Array,
   Map,
   Extension,
+  Empty, // Used by MsgPackDocument to represent an empty node
 };
 
 /// Extension types are composed of a user-defined type ID and an uninterpreted

diff  --git a/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/llvm/lib/BinaryFormat/MsgPackDocument.cpp
index e12c54a37ad0..a7e43a8b1538 100644
--- a/llvm/lib/BinaryFormat/MsgPackDocument.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackDocument.cpp
@@ -40,46 +40,56 @@ DocNode &MapDocNode::operator[](StringRef S) {
 /// Member access for MapDocNode.
 DocNode &MapDocNode::operator[](DocNode Key) {
   assert(!Key.isEmpty());
-  MapTy::value_type Entry(Key, DocNode());
-  auto ItAndInserted = Map->insert(Entry);
-  if (ItAndInserted.second) {
+  DocNode &N = (*Map)[Key];
+  if (N.isEmpty()) {
     // Ensure a new element has its KindAndDoc initialized.
-    ItAndInserted.first->second = getDocument()->getNode();
+    N = getDocument()->getEmptyNode();
   }
-  return ItAndInserted.first->second;
+  return N;
 }
 
 /// Array element access. This extends the array if necessary.
 DocNode &ArrayDocNode::operator[](size_t Index) {
   if (size() <= Index) {
     // Ensure new elements have their KindAndDoc initialized.
-    Array->resize(Index + 1, getDocument()->getNode());
+    Array->resize(Index + 1, getDocument()->getEmptyNode());
   }
   return (*Array)[Index];
 }
 
 // A level in the document reading stack.
 struct StackLevel {
+  StackLevel(DocNode Node, size_t StartIndex, size_t Length,
+             DocNode *MapEntry = nullptr)
+      : Node(Node), Index(StartIndex), End(StartIndex + Length),
+        MapEntry(MapEntry) {}
   DocNode Node;
-  size_t Length;
+  size_t Index;
+  size_t End;
   // Points to map entry when we have just processed a map key.
   DocNode *MapEntry;
+  DocNode MapKey;
 };
 
-// Read a document from a binary msgpack blob.
+// Read a document from a binary msgpack blob, merging into anything already in
+// the Document.
 // The blob data must remain valid for the lifetime of this Document (because a
 // string object in the document contains a StringRef into the original blob).
 // If Multi, then this sets root to an array and adds top-level objects to it.
 // If !Multi, then it only reads a single top-level object, even if there are
 // more, and sets root to that.
-// Returns false if failed due to illegal format.
-bool Document::readFromBlob(StringRef Blob, bool Multi) {
+// Returns false if failed due to illegal format or merge error.
+
+bool Document::readFromBlob(
+    StringRef Blob, bool Multi,
+    function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
+        Merger) {
   msgpack::Reader MPReader(Blob);
   SmallVector<StackLevel, 4> Stack;
   if (Multi) {
     // Create the array for multiple top-level objects.
     Root = getArrayNode();
-    Stack.push_back(StackLevel({Root, (size_t)-1, nullptr}));
+    Stack.push_back(StackLevel(Root, 0, (size_t)-1));
   }
   do {
     // On to next element (or key if doing a map key next).
@@ -124,29 +134,47 @@ bool Document::readFromBlob(StringRef Blob, bool Multi) {
     }
 
     // Store it.
+    DocNode *DestNode = nullptr;
     if (Stack.empty())
-      Root = Node;
+      DestNode = &Root;
     else if (Stack.back().Node.getKind() == Type::Array) {
       // Reading an array entry.
       auto &Array = Stack.back().Node.getArray();
-      Array.push_back(Node);
+      DestNode = &Array[Stack.back().Index++];
     } else {
       auto &Map = Stack.back().Node.getMap();
       if (!Stack.back().MapEntry) {
         // Reading a map key.
+        Stack.back().MapKey = Node;
         Stack.back().MapEntry = &Map[Node];
-      } else {
-        // Reading the value for the map key read in the last iteration.
-        *Stack.back().MapEntry = Node;
-        Stack.back().MapEntry = nullptr;
+        continue;
       }
+      // Reading the value for the map key read in the last iteration.
+      DestNode = Stack.back().MapEntry;
+      Stack.back().MapEntry = nullptr;
+      ++Stack.back().Index;
     }
+    int MergeResult = 0;
+    if (!DestNode->isEmpty()) {
+      // In a merge, there is already a value at this position. Call the
+      // callback to attempt to resolve the conflict. The resolution must result
+      // in an array or map if Node is an array or map respectively.
+      DocNode MapKey = !Stack.empty() && !Stack.back().MapKey.isEmpty()
+                           ? Stack.back().MapKey
+                           : getNode();
+      MergeResult = Merger(DestNode, Node, MapKey);
+      if (MergeResult < 0)
+        return false; // Merge conflict resolution failed
+      assert(!((Node.isMap() && !DestNode->isMap()) ||
+               (Node.isArray() && !DestNode->isArray())));
+    } else
+      *DestNode = Node;
 
     // See if we're starting a new array or map.
-    switch (Node.getKind()) {
+    switch (DestNode->getKind()) {
     case msgpack::Type::Array:
     case msgpack::Type::Map:
-      Stack.push_back(StackLevel({Node, Obj.Length, nullptr}));
+      Stack.push_back(StackLevel(*DestNode, MergeResult, Obj.Length, nullptr));
       break;
     default:
       break;
@@ -154,14 +182,10 @@ bool Document::readFromBlob(StringRef Blob, bool Multi) {
 
     // Pop finished stack levels.
     while (!Stack.empty()) {
-      if (Stack.back().Node.getKind() == msgpack::Type::Array) {
-        if (Stack.back().Node.getArray().size() != Stack.back().Length)
-          break;
-      } else {
-        if (Stack.back().MapEntry ||
-            Stack.back().Node.getMap().size() != Stack.back().Length)
-          break;
-      }
+      if (Stack.back().MapEntry)
+        break;
+      if (Stack.back().Index != Stack.back().End)
+        break;
       Stack.pop_back();
     }
   } while (!Stack.empty());

diff  --git a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
index 00b157963f68..709612009a97 100644
--- a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
+++ b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
@@ -21,7 +21,7 @@ TEST(MsgPackDocument, TestReadInt) {
   ASSERT_EQ(Doc.getRoot().getInt(), 0);
 }
 
-TEST(MsgPackDocument, TestReadArray) {
+TEST(MsgPackDocument, TestReadMergeArray) {
   Document Doc;
   bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false);
   ASSERT_TRUE(Ok);
@@ -33,9 +33,64 @@ TEST(MsgPackDocument, TestReadArray) {
   ASSERT_EQ(SI.getInt(), 1);
   auto SN = A[1];
   ASSERT_EQ(SN.getKind(), Type::Nil);
+
+  Ok = Doc.readFromBlob(StringRef("\x91\xd0\x2a"), /*Multi=*/false,
+                        [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+                          // Allow array, merging into existing elements, ORing
+                          // ints.
+                          if (DestNode->getKind() == Type::Int &&
+                              SrcNode.getKind() == Type::Int) {
+                            *DestNode = DestNode->getDocument()->getNode(
+                                DestNode->getInt() | SrcNode.getInt());
+                            return 0;
+                          }
+                          return DestNode->isArray() && SrcNode.isArray() ? 0
+                                                                          : -1;
+                        });
+  ASSERT_TRUE(Ok);
+  A = Doc.getRoot().getArray();
+  ASSERT_EQ(A.size(), 2u);
+  SI = A[0];
+  ASSERT_EQ(SI.getKind(), Type::Int);
+  ASSERT_EQ(SI.getInt(), 43);
+  SN = A[1];
+  ASSERT_EQ(SN.getKind(), Type::Nil);
+}
+
+TEST(MsgPackDocument, TestReadAppendArray) {
+  Document Doc;
+  bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false);
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(Doc.getRoot().getKind(), Type::Array);
+  auto A = Doc.getRoot().getArray();
+  ASSERT_EQ(A.size(), 2u);
+  auto SI = A[0];
+  ASSERT_EQ(SI.getKind(), Type::Int);
+  ASSERT_EQ(SI.getInt(), 1);
+  auto SN = A[1];
+  ASSERT_EQ(SN.getKind(), Type::Nil);
+
+  Ok = Doc.readFromBlob(StringRef("\x91\xd0\x2a"), /*Multi=*/false,
+                        [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+                          // Allow array, appending after existing elements
+                          return DestNode->isArray() && SrcNode.isArray()
+                                     ? DestNode->getArray().size()
+                                     : -1;
+                        });
+  ASSERT_TRUE(Ok);
+  A = Doc.getRoot().getArray();
+  ASSERT_EQ(A.size(), 3u);
+  SI = A[0];
+  ASSERT_EQ(SI.getKind(), Type::Int);
+  ASSERT_EQ(SI.getInt(), 1);
+  SN = A[1];
+  ASSERT_EQ(SN.getKind(), Type::Nil);
+  SI = A[2];
+  ASSERT_EQ(SI.getKind(), Type::Int);
+  ASSERT_EQ(SI.getInt(), 42);
 }
 
-TEST(MsgPackDocument, TestReadMap) {
+TEST(MsgPackDocument, TestReadMergeMap) {
   Document Doc;
   bool Ok = Doc.readFromBlob(StringRef("\x82\xa3"
                                        "foo"
@@ -53,6 +108,68 @@ TEST(MsgPackDocument, TestReadMap) {
   auto BarS = M["bar"];
   ASSERT_EQ(BarS.getKind(), Type::Int);
   ASSERT_EQ(BarS.getInt(), 2);
+
+  Ok = Doc.readFromBlob(StringRef("\x82\xa3"
+                                  "foz"
+                                  "\xd0\x03\xa3"
+                                  "baz"
+                                  "\xd0\x04"),
+                        /*Multi=*/false,
+                        [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+                          return DestNode->isMap() && SrcNode.isMap() ? 0 : -1;
+                        });
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(M.size(), 4u);
+  FooS = M["foo"];
+  ASSERT_EQ(FooS.getKind(), Type::Int);
+  ASSERT_EQ(FooS.getInt(), 1);
+  BarS = M["bar"];
+  ASSERT_EQ(BarS.getKind(), Type::Int);
+  ASSERT_EQ(BarS.getInt(), 2);
+  auto FozS = M["foz"];
+  ASSERT_EQ(FozS.getKind(), Type::Int);
+  ASSERT_EQ(FozS.getInt(), 3);
+  auto BazS = M["baz"];
+  ASSERT_EQ(BazS.getKind(), Type::Int);
+  ASSERT_EQ(BazS.getInt(), 4);
+
+  Ok = Doc.readFromBlob(
+      StringRef("\x82\xa3"
+                "foz"
+                "\xd0\x06\xa3"
+                "bay"
+                "\xd0\x08"),
+      /*Multi=*/false, [](DocNode *Dest, DocNode Src, DocNode MapKey) {
+        // Merger function that merges two ints by ORing their values, as long
+        // as the map key is "foz".
+        if (Src.isMap())
+          return Dest->isMap();
+        if (Src.isArray())
+          return Dest->isArray();
+        if (MapKey.isString() && MapKey.getString() == "foz" &&
+            Dest->getKind() == Type::Int && Src.getKind() == Type::Int) {
+          *Dest = Src.getDocument()->getNode(Dest->getInt() | Src.getInt());
+          return true;
+        }
+        return false;
+      });
+  ASSERT_TRUE(Ok);
+  ASSERT_EQ(M.size(), 5u);
+  FooS = M["foo"];
+  ASSERT_EQ(FooS.getKind(), Type::Int);
+  ASSERT_EQ(FooS.getInt(), 1);
+  BarS = M["bar"];
+  ASSERT_EQ(BarS.getKind(), Type::Int);
+  ASSERT_EQ(BarS.getInt(), 2);
+  FozS = M["foz"];
+  ASSERT_EQ(FozS.getKind(), Type::Int);
+  ASSERT_EQ(FozS.getInt(), 7);
+  BazS = M["baz"];
+  ASSERT_EQ(BazS.getKind(), Type::Int);
+  ASSERT_EQ(BazS.getInt(), 4);
+  auto BayS = M["bay"];
+  ASSERT_EQ(BayS.getKind(), Type::Int);
+  ASSERT_EQ(BayS.getInt(), 8);
 }
 
 TEST(MsgPackDocument, TestWriteInt) {


        


More information about the llvm-commits mailing list