[llvm] e79d002 - [MsgPack] MsgPackDocument::readFromBlob now merges
Tim Renouf via llvm-commits
llvm-commits at lists.llvm.org
Thu May 21 13:26:59 PDT 2020
Author: Tim Renouf
Date: 2020-05-21T21:26:26+01:00
New Revision: e79d002309cb52b12f9bf4e205ffc9805e2223d7
URL: https://github.com/llvm/llvm-project/commit/e79d002309cb52b12f9bf4e205ffc9805e2223d7
DIFF: https://github.com/llvm/llvm-project/commit/e79d002309cb52b12f9bf4e205ffc9805e2223d7.diff
LOG: [MsgPack] MsgPackDocument::readFromBlob now merges
The readFromBlob method can now be used to read MsgPack into a Document
that already contains something, merging the two. There is a new Merger
argument to readFromBlob, a callback function to resolve conflicts.
Differential Revision: https://reviews.llvm.org/D79671
Change-Id: Icf3e959217fe33cd907a41516c0386aef2847c0c
Added:
Modified:
llvm/include/llvm/BinaryFormat/MsgPackDocument.h
llvm/include/llvm/BinaryFormat/MsgPackReader.h
llvm/lib/BinaryFormat/MsgPackDocument.cpp
llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
index 824ecc353207..15be6d5922cf 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -62,6 +62,8 @@ class DocNode {
};
public:
+ // Default constructor gives an empty node with no associated Document. All
+ // you can do with it is "isEmpty()".
DocNode() : KindAndDoc(nullptr) {}
// Type methods
@@ -70,8 +72,10 @@ class DocNode {
bool isScalar() const { return !isMap() && !isArray(); }
bool isString() const { return getKind() == Type::String; }
- // Accessors
- bool isEmpty() const { return !KindAndDoc; }
+ // Accessors. isEmpty() returns true for both a default-constructed DocNode
+ // that has no associated Document, and the result of getEmptyNode(), which
+ // does have an associated document.
+ bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; }
Type getKind() const { return KindAndDoc->Kind; }
Document *getDocument() const { return KindAndDoc->Doc; }
@@ -146,10 +150,10 @@ class DocNode {
friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
// This has to cope with one or both of the nodes being default-constructed,
// such that KindAndDoc is not set.
+ if (Rhs.isEmpty())
+ return false;
if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
- if (!Rhs.KindAndDoc)
- return false;
- if (!Lhs.KindAndDoc)
+ if (Lhs.isEmpty())
return true;
return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
}
@@ -222,14 +226,15 @@ class ArrayDocNode : public DocNode {
// Array access methods.
size_t size() const { return Array->size(); }
bool empty() const { return !size(); }
+ DocNode &back() const { return Array->back(); }
ArrayTy::iterator begin() { return Array->begin(); }
ArrayTy::iterator end() { return Array->end(); }
void push_back(DocNode N) {
- assert(N.getDocument() == getDocument());
+ assert(N.isEmpty() || N.getDocument() == getDocument());
Array->push_back(N);
}
- /// Element access. This extends the array if necessary.
+ /// Element access. This extends the array if necessary, with empty nodes.
DocNode &operator[](size_t Index);
};
@@ -247,7 +252,7 @@ class Document {
DocNode Root;
// The KindAndDocument structs pointed to by nodes in the document.
- KindAndDocument KindAndDocs[size_t(Type::Extension) + 1];
+ KindAndDocument KindAndDocs[size_t(Type::Empty) + 1];
// Whether YAML output uses hex for UInt.
bool HexMode = false;
@@ -255,7 +260,7 @@ class Document {
public:
Document() {
clear();
- for (unsigned T = 0; T != size_t(Type::Extension) + 1; ++T)
+ for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T)
KindAndDocs[T] = {this, Type(T)};
}
@@ -263,7 +268,13 @@ class Document {
DocNode &getRoot() { return Root; }
/// Restore the Document to an empty state.
- void clear() { getRoot() = getNode(); }
+ void clear() { getRoot() = getEmptyNode(); }
+
+ /// Create an empty node associated with this Document.
+ DocNode getEmptyNode() {
+ auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]);
+ return N;
+ }
/// Create a nil node associated with this Document.
DocNode getNode() {
@@ -345,15 +356,35 @@ class Document {
return N.getArray();
}
- /// Read a MsgPack document from a binary MsgPack blob.
- /// The blob data must remain valid for the lifetime of this Document (because
- /// a string object in the document contains a StringRef into the original
- /// blob).
- /// If Multi, then this sets root to an array and adds top-level objects to
- /// it. If !Multi, then it only reads a single top-level object, even if there
- /// are more, and sets root to that.
- /// Returns false if failed due to illegal format.
- bool readFromBlob(StringRef Blob, bool Multi);
+ /// Read a document from a binary msgpack blob, merging into anything already
+ /// in the Document. The blob data must remain valid for the lifetime of this
+ /// Document (because a string object in the document contains a StringRef
+ /// into the original blob). If Multi, then this sets root to an array and
+ /// adds top-level objects to it. If !Multi, then it only reads a single
+ /// top-level object, even if there are more, and sets root to that. Returns
+ /// false if failed due to illegal format or merge error.
+ ///
+ /// The Merger arg is a callback function that is called when the merge has a
+ /// conflict, that is, it is trying to set an item that is already set. If the
+ /// conflict cannot be resolved, the callback function returns -1. If the
+ /// conflict can be resolved, the callback returns a non-negative number and
+ /// sets *DestNode to the resolved node. The returned non-negative number is
+ /// significant only for an array node; it is then the array index to start
+ /// populating at. That allows Merger to choose whether to merge array
+ /// elements (returns 0) or append new elements (returns existing size).
+ ///
+ /// If SrcNode is an array or map, the resolution must be that *DestNode is an
+ /// array or map respectively, although it could be the array or map
+ /// (respectively) that was already there. MapKey is the key if *DestNode is a
+ /// map entry, a nil node otherwise.
+ ///
+ /// The default for Merger is to disallow any conflict.
+ bool readFromBlob(
+ StringRef Blob, bool Multi,
+ function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
+ Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+ return -1;
+ });
/// Write a MsgPack document to a binary MsgPack blob.
void writeToBlob(std::string &Blob);
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackReader.h b/llvm/include/llvm/BinaryFormat/MsgPackReader.h
index bd760f7d7c87..a6ae542637fd 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackReader.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackReader.h
@@ -57,6 +57,7 @@ enum class Type : uint8_t {
Array,
Map,
Extension,
+ Empty, // Used by MsgPackDocument to represent an empty node
};
/// Extension types are composed of a user-defined type ID and an uninterpreted
diff --git a/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/llvm/lib/BinaryFormat/MsgPackDocument.cpp
index e12c54a37ad0..a7e43a8b1538 100644
--- a/llvm/lib/BinaryFormat/MsgPackDocument.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackDocument.cpp
@@ -40,46 +40,56 @@ DocNode &MapDocNode::operator[](StringRef S) {
/// Member access for MapDocNode.
DocNode &MapDocNode::operator[](DocNode Key) {
assert(!Key.isEmpty());
- MapTy::value_type Entry(Key, DocNode());
- auto ItAndInserted = Map->insert(Entry);
- if (ItAndInserted.second) {
+ DocNode &N = (*Map)[Key];
+ if (N.isEmpty()) {
// Ensure a new element has its KindAndDoc initialized.
- ItAndInserted.first->second = getDocument()->getNode();
+ N = getDocument()->getEmptyNode();
}
- return ItAndInserted.first->second;
+ return N;
}
/// Array element access. This extends the array if necessary.
DocNode &ArrayDocNode::operator[](size_t Index) {
if (size() <= Index) {
// Ensure new elements have their KindAndDoc initialized.
- Array->resize(Index + 1, getDocument()->getNode());
+ Array->resize(Index + 1, getDocument()->getEmptyNode());
}
return (*Array)[Index];
}
// A level in the document reading stack.
struct StackLevel {
+ StackLevel(DocNode Node, size_t StartIndex, size_t Length,
+ DocNode *MapEntry = nullptr)
+ : Node(Node), Index(StartIndex), End(StartIndex + Length),
+ MapEntry(MapEntry) {}
DocNode Node;
- size_t Length;
+ size_t Index;
+ size_t End;
// Points to map entry when we have just processed a map key.
DocNode *MapEntry;
+ DocNode MapKey;
};
-// Read a document from a binary msgpack blob.
+// Read a document from a binary msgpack blob, merging into anything already in
+// the Document.
// The blob data must remain valid for the lifetime of this Document (because a
// string object in the document contains a StringRef into the original blob).
// If Multi, then this sets root to an array and adds top-level objects to it.
// If !Multi, then it only reads a single top-level object, even if there are
// more, and sets root to that.
-// Returns false if failed due to illegal format.
-bool Document::readFromBlob(StringRef Blob, bool Multi) {
+// Returns false if failed due to illegal format or merge error.
+
+bool Document::readFromBlob(
+ StringRef Blob, bool Multi,
+ function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
+ Merger) {
msgpack::Reader MPReader(Blob);
SmallVector<StackLevel, 4> Stack;
if (Multi) {
// Create the array for multiple top-level objects.
Root = getArrayNode();
- Stack.push_back(StackLevel({Root, (size_t)-1, nullptr}));
+ Stack.push_back(StackLevel(Root, 0, (size_t)-1));
}
do {
// On to next element (or key if doing a map key next).
@@ -124,29 +134,47 @@ bool Document::readFromBlob(StringRef Blob, bool Multi) {
}
// Store it.
+ DocNode *DestNode = nullptr;
if (Stack.empty())
- Root = Node;
+ DestNode = &Root;
else if (Stack.back().Node.getKind() == Type::Array) {
// Reading an array entry.
auto &Array = Stack.back().Node.getArray();
- Array.push_back(Node);
+ DestNode = &Array[Stack.back().Index++];
} else {
auto &Map = Stack.back().Node.getMap();
if (!Stack.back().MapEntry) {
// Reading a map key.
+ Stack.back().MapKey = Node;
Stack.back().MapEntry = &Map[Node];
- } else {
- // Reading the value for the map key read in the last iteration.
- *Stack.back().MapEntry = Node;
- Stack.back().MapEntry = nullptr;
+ continue;
}
+ // Reading the value for the map key read in the last iteration.
+ DestNode = Stack.back().MapEntry;
+ Stack.back().MapEntry = nullptr;
+ ++Stack.back().Index;
}
+ int MergeResult = 0;
+ if (!DestNode->isEmpty()) {
+ // In a merge, there is already a value at this position. Call the
+ // callback to attempt to resolve the conflict. The resolution must result
+ // in an array or map if Node is an array or map respectively.
+ DocNode MapKey = !Stack.empty() && !Stack.back().MapKey.isEmpty()
+ ? Stack.back().MapKey
+ : getNode();
+ MergeResult = Merger(DestNode, Node, MapKey);
+ if (MergeResult < 0)
+ return false; // Merge conflict resolution failed
+ assert(!((Node.isMap() && !DestNode->isMap()) ||
+ (Node.isArray() && !DestNode->isArray())));
+ } else
+ *DestNode = Node;
// See if we're starting a new array or map.
- switch (Node.getKind()) {
+ switch (DestNode->getKind()) {
case msgpack::Type::Array:
case msgpack::Type::Map:
- Stack.push_back(StackLevel({Node, Obj.Length, nullptr}));
+ Stack.push_back(StackLevel(*DestNode, MergeResult, Obj.Length, nullptr));
break;
default:
break;
@@ -154,14 +182,10 @@ bool Document::readFromBlob(StringRef Blob, bool Multi) {
// Pop finished stack levels.
while (!Stack.empty()) {
- if (Stack.back().Node.getKind() == msgpack::Type::Array) {
- if (Stack.back().Node.getArray().size() != Stack.back().Length)
- break;
- } else {
- if (Stack.back().MapEntry ||
- Stack.back().Node.getMap().size() != Stack.back().Length)
- break;
- }
+ if (Stack.back().MapEntry)
+ break;
+ if (Stack.back().Index != Stack.back().End)
+ break;
Stack.pop_back();
}
} while (!Stack.empty());
diff --git a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
index 00b157963f68..709612009a97 100644
--- a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
+++ b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp
@@ -21,7 +21,7 @@ TEST(MsgPackDocument, TestReadInt) {
ASSERT_EQ(Doc.getRoot().getInt(), 0);
}
-TEST(MsgPackDocument, TestReadArray) {
+TEST(MsgPackDocument, TestReadMergeArray) {
Document Doc;
bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false);
ASSERT_TRUE(Ok);
@@ -33,9 +33,64 @@ TEST(MsgPackDocument, TestReadArray) {
ASSERT_EQ(SI.getInt(), 1);
auto SN = A[1];
ASSERT_EQ(SN.getKind(), Type::Nil);
+
+ Ok = Doc.readFromBlob(StringRef("\x91\xd0\x2a"), /*Multi=*/false,
+ [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+ // Allow array, merging into existing elements, ORing
+ // ints.
+ if (DestNode->getKind() == Type::Int &&
+ SrcNode.getKind() == Type::Int) {
+ *DestNode = DestNode->getDocument()->getNode(
+ DestNode->getInt() | SrcNode.getInt());
+ return 0;
+ }
+ return DestNode->isArray() && SrcNode.isArray() ? 0
+ : -1;
+ });
+ ASSERT_TRUE(Ok);
+ A = Doc.getRoot().getArray();
+ ASSERT_EQ(A.size(), 2u);
+ SI = A[0];
+ ASSERT_EQ(SI.getKind(), Type::Int);
+ ASSERT_EQ(SI.getInt(), 43);
+ SN = A[1];
+ ASSERT_EQ(SN.getKind(), Type::Nil);
+}
+
+TEST(MsgPackDocument, TestReadAppendArray) {
+ Document Doc;
+ bool Ok = Doc.readFromBlob(StringRef("\x92\xd0\x01\xc0"), /*Multi=*/false);
+ ASSERT_TRUE(Ok);
+ ASSERT_EQ(Doc.getRoot().getKind(), Type::Array);
+ auto A = Doc.getRoot().getArray();
+ ASSERT_EQ(A.size(), 2u);
+ auto SI = A[0];
+ ASSERT_EQ(SI.getKind(), Type::Int);
+ ASSERT_EQ(SI.getInt(), 1);
+ auto SN = A[1];
+ ASSERT_EQ(SN.getKind(), Type::Nil);
+
+ Ok = Doc.readFromBlob(StringRef("\x91\xd0\x2a"), /*Multi=*/false,
+ [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+ // Allow array, appending after existing elements
+ return DestNode->isArray() && SrcNode.isArray()
+ ? DestNode->getArray().size()
+ : -1;
+ });
+ ASSERT_TRUE(Ok);
+ A = Doc.getRoot().getArray();
+ ASSERT_EQ(A.size(), 3u);
+ SI = A[0];
+ ASSERT_EQ(SI.getKind(), Type::Int);
+ ASSERT_EQ(SI.getInt(), 1);
+ SN = A[1];
+ ASSERT_EQ(SN.getKind(), Type::Nil);
+ SI = A[2];
+ ASSERT_EQ(SI.getKind(), Type::Int);
+ ASSERT_EQ(SI.getInt(), 42);
}
-TEST(MsgPackDocument, TestReadMap) {
+TEST(MsgPackDocument, TestReadMergeMap) {
Document Doc;
bool Ok = Doc.readFromBlob(StringRef("\x82\xa3"
"foo"
@@ -53,6 +108,68 @@ TEST(MsgPackDocument, TestReadMap) {
auto BarS = M["bar"];
ASSERT_EQ(BarS.getKind(), Type::Int);
ASSERT_EQ(BarS.getInt(), 2);
+
+ Ok = Doc.readFromBlob(StringRef("\x82\xa3"
+ "foz"
+ "\xd0\x03\xa3"
+ "baz"
+ "\xd0\x04"),
+ /*Multi=*/false,
+ [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
+ return DestNode->isMap() && SrcNode.isMap() ? 0 : -1;
+ });
+ ASSERT_TRUE(Ok);
+ ASSERT_EQ(M.size(), 4u);
+ FooS = M["foo"];
+ ASSERT_EQ(FooS.getKind(), Type::Int);
+ ASSERT_EQ(FooS.getInt(), 1);
+ BarS = M["bar"];
+ ASSERT_EQ(BarS.getKind(), Type::Int);
+ ASSERT_EQ(BarS.getInt(), 2);
+ auto FozS = M["foz"];
+ ASSERT_EQ(FozS.getKind(), Type::Int);
+ ASSERT_EQ(FozS.getInt(), 3);
+ auto BazS = M["baz"];
+ ASSERT_EQ(BazS.getKind(), Type::Int);
+ ASSERT_EQ(BazS.getInt(), 4);
+
+ Ok = Doc.readFromBlob(
+ StringRef("\x82\xa3"
+ "foz"
+ "\xd0\x06\xa3"
+ "bay"
+ "\xd0\x08"),
+ /*Multi=*/false, [](DocNode *Dest, DocNode Src, DocNode MapKey) {
+ // Merger function that merges two ints by ORing their values, as long
+ // as the map key is "foz".
+ if (Src.isMap())
+ return Dest->isMap();
+ if (Src.isArray())
+ return Dest->isArray();
+ if (MapKey.isString() && MapKey.getString() == "foz" &&
+ Dest->getKind() == Type::Int && Src.getKind() == Type::Int) {
+ *Dest = Src.getDocument()->getNode(Dest->getInt() | Src.getInt());
+ return true;
+ }
+ return false;
+ });
+ ASSERT_TRUE(Ok);
+ ASSERT_EQ(M.size(), 5u);
+ FooS = M["foo"];
+ ASSERT_EQ(FooS.getKind(), Type::Int);
+ ASSERT_EQ(FooS.getInt(), 1);
+ BarS = M["bar"];
+ ASSERT_EQ(BarS.getKind(), Type::Int);
+ ASSERT_EQ(BarS.getInt(), 2);
+ FozS = M["foz"];
+ ASSERT_EQ(FozS.getKind(), Type::Int);
+ ASSERT_EQ(FozS.getInt(), 7);
+ BazS = M["baz"];
+ ASSERT_EQ(BazS.getKind(), Type::Int);
+ ASSERT_EQ(BazS.getInt(), 4);
+ auto BayS = M["bay"];
+ ASSERT_EQ(BayS.getKind(), Type::Int);
+ ASSERT_EQ(BayS.getInt(), 8);
}
TEST(MsgPackDocument, TestWriteInt) {
More information about the llvm-commits
mailing list