[Mlir-commits] [llvm] [mlir] [mlir] API to serialize bytecode to mmap'd buffer (PR #126953)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Wed Feb 12 10:38:05 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mlir

Author: Nikhil Kalra (nikalra)

<details>
<summary>Changes</summary>

For clients wanting to serialize bytecode to an in-memory buffer, there is currently no way to query `BytecodeWriter` for the required buffer size before the payload is written to the given stream. As a result, users of `BytecodeWriter` who need to serialize to an in-memory buffer must use `raw_svector_ostream` or equivalent, which results in repeated memory allocations and copies as the buffer is exhausted.

To solve this, we'll provide a new API for writing bytecode to a memory-mapped buffer that is appropriately sized for the given `Operation` being encoded. We do this by splitting bytecode encoding and writing into two separate routines so that it's possible to allocate a buffer for the encoded size prior to writing.

Future iterations of this routine may want to optimize encoding such that sections are also written to memory-mapped buffers so that the entire module isn't duplicated in memory prior to being written to the stream.

---
Full diff: https://github.com/llvm/llvm-project/pull/126953.diff


5 Files Affected:

- (modified) llvm/include/llvm/Support/raw_ostream.h (+13) 
- (modified) llvm/lib/Support/raw_ostream.cpp (+13) 
- (modified) mlir/include/mlir/Bytecode/BytecodeWriter.h (+6) 
- (modified) mlir/lib/Bytecode/Writer/BytecodeWriter.cpp (+63-8) 
- (modified) mlir/unittests/Bytecode/BytecodeTest.cpp (+22) 


``````````diff
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index d3b411590e7fd..90c0c013e38c8 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_SUPPORT_RAW_OSTREAM_H
 #define LLVM_SUPPORT_RAW_OSTREAM_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
@@ -769,6 +770,18 @@ class buffer_unique_ostream : public raw_svector_ostream {
   ~buffer_unique_ostream() override { *OS << str(); }
 };
 
+// Creates an output stream with a fixed size buffer.
+class fixed_buffer_ostream : public raw_ostream {
+  MutableArrayRef<std::byte> Buffer;
+  size_t Pos = 0;
+
+  void write_impl(const char *Ptr, size_t Size) final;
+  uint64_t current_pos() const final { return Pos; }
+
+public:
+  fixed_buffer_ostream(MutableArrayRef<std::byte> Buffer);
+};
+
 // Helper struct to add indentation to raw_ostream. Instead of
 // OS.indent(6) << "more stuff";
 // you can use
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index e75ddc66b7d16..875c14782dd2e 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -1009,6 +1009,19 @@ void buffer_ostream::anchor() {}
 
 void buffer_unique_ostream::anchor() {}
 
+void fixed_buffer_ostream::write_impl(const char *Ptr, size_t Size) {
+  if (Pos + Size <= Buffer.size()) {
+    memcpy((void *)(Buffer.data() + Pos), Ptr, Size);
+    Pos += Size;
+  } else {
+    report_fatal_error(
+        "Attempted to write past the end of the fixed size buffer.");
+  }
+}
+
+fixed_buffer_ostream::fixed_buffer_ostream(MutableArrayRef<std::byte> Buffer)
+    : raw_ostream(true), Buffer{Buffer} {}
+
 Error llvm::writeToOutput(StringRef OutputFileName,
                           std::function<Error(raw_ostream &)> Write) {
   if (OutputFileName == "-")
diff --git a/mlir/include/mlir/Bytecode/BytecodeWriter.h b/mlir/include/mlir/Bytecode/BytecodeWriter.h
index c6cff0bc81314..4945adc3e9304 100644
--- a/mlir/include/mlir/Bytecode/BytecodeWriter.h
+++ b/mlir/include/mlir/Bytecode/BytecodeWriter.h
@@ -192,6 +192,12 @@ class BytecodeWriterConfig {
 LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os,
                                   const BytecodeWriterConfig &config = {});
 
+/// Writes the bytecode for the given operation to a memory-mapped buffer.
+/// It only ever fails if setDesiredByteCodeVersion can't be honored.
+/// Returns nullptr on failure.
+std::shared_ptr<ArrayRef<std::byte>>
+writeBytecode(Operation *op, const BytecodeWriterConfig &config = {});
+
 } // namespace mlir
 
 #endif // MLIR_BYTECODE_BYTECODEWRITER_H
diff --git a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
index 2b4697434717d..c2a33e897ec07 100644
--- a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
+++ b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
@@ -20,8 +20,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cstddef>
 #include <optional>
+#include <system_error>
 
 #define DEBUG_TYPE "mlir-bytecode-writer"
 
@@ -652,7 +655,7 @@ class BytecodeWriter {
         propertiesSection(numberingState, stringSection, config.getImpl()) {}
 
   /// Write the bytecode for the given root operation.
-  LogicalResult write(Operation *rootOp, raw_ostream &os);
+  LogicalResult writeInto(Operation *rootOp, EncodingEmitter &emitter);
 
 private:
   //===--------------------------------------------------------------------===//
@@ -718,9 +721,8 @@ class BytecodeWriter {
 };
 } // namespace
 
-LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
-  EncodingEmitter emitter;
-
+LogicalResult BytecodeWriter::writeInto(Operation *rootOp,
+                                        EncodingEmitter &emitter) {
   // Emit the bytecode file header. This is how we identify the output as a
   // bytecode file.
   emitter.emitString("ML\xefR", "bytecode header");
@@ -761,9 +763,6 @@ LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
     return rootOp->emitError(
         "unexpected properties emitted incompatible with bytecode <5");
 
-  // Write the generated bytecode to the provided output stream.
-  emitter.writeTo(os);
-
   return success();
 }
 
@@ -1348,5 +1347,61 @@ void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
 LogicalResult mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
                                         const BytecodeWriterConfig &config) {
   BytecodeWriter writer(op, config);
-  return writer.write(op, os);
+  EncodingEmitter emitter;
+
+  if (succeeded(writer.writeInto(op, emitter))) {
+    emitter.writeTo(os);
+    return success();
+  }
+
+  return failure();
+}
+
+namespace {
+struct MemoryMappedBlock {
+  static std::shared_ptr<MemoryMappedBlock>
+  createMemoryMappedBlock(size_t numBytes) {
+    auto instance = std::make_shared<MemoryMappedBlock>();
+
+    std::error_code ec;
+    instance->mmapBlock =
+        llvm::sys::OwningMemoryBlock{llvm::sys::Memory::allocateMappedMemory(
+            numBytes, nullptr, llvm::sys::Memory::MF_WRITE, ec)};
+    if (ec)
+      return nullptr;
+
+    instance->writableView = MutableArrayRef<std::byte>(
+        (std::byte *)instance->mmapBlock.base(), numBytes);
+
+    return instance;
+  }
+
+  llvm::sys::OwningMemoryBlock mmapBlock;
+  MutableArrayRef<std::byte> writableView;
+};
+} // namespace
+
+std::shared_ptr<ArrayRef<std::byte>>
+mlir::writeBytecode(Operation *op, const BytecodeWriterConfig &config) {
+  BytecodeWriter writer(op, config);
+  EncodingEmitter emitter;
+  if (succeeded(writer.writeInto(op, emitter))) {
+    // Allocate a new memory block for the emitter to write into.
+    auto block = MemoryMappedBlock::createMemoryMappedBlock(emitter.size());
+    if (!block)
+      return nullptr;
+
+    // Wrap the block in an output stream.
+    llvm::fixed_buffer_ostream stream(block->writableView);
+    emitter.writeTo(stream);
+
+    // Write protect the block.
+    if (llvm::sys::Memory::protectMappedMemory(
+            block->mmapBlock.getMemoryBlock(), llvm::sys::Memory::MF_READ))
+      return nullptr;
+
+    return std::shared_ptr<ArrayRef<std::byte>>(block, &block->writableView);
+  }
+
+  return nullptr;
 }
diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index cb915a092a0be..a3c069fbcab58 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -16,9 +16,11 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/LogicalResult.h"
 #include "llvm/Support/MemoryBufferRef.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <cstring>
 
 using namespace llvm;
 using namespace mlir;
@@ -88,6 +90,26 @@ TEST(Bytecode, MultiModuleWithResource) {
   checkResourceAttribute(*roundTripModule);
 }
 
+TEST(Bytecode, WriteEquivalence) {
+  MLIRContext context;
+  Builder builder(&context);
+  ParserConfig parseConfig(&context);
+  OwningOpRef<Operation *> module =
+      parseSourceString<Operation *>(irWithResources, parseConfig);
+  ASSERT_TRUE(module);
+
+  // Write the module to bytecode
+  std::string buffer;
+  llvm::raw_string_ostream ostream(buffer);
+  ASSERT_TRUE(succeeded(writeBytecodeToFile(module.get(), ostream)));
+
+  // Write the module to bytecode using the mmap API.
+  auto writeBuffer = writeBytecode(module.get());
+  ASSERT_TRUE(writeBuffer);
+  ASSERT_EQ(writeBuffer->size(), buffer.size());
+  ASSERT_EQ(memcmp(buffer.data(), writeBuffer->data(), writeBuffer->size()), 0);
+}
+
 namespace {
 /// A custom operation for the purpose of showcasing how discardable attributes
 /// are handled in absence of properties.

``````````

</details>


https://github.com/llvm/llvm-project/pull/126953


More information about the Mlir-commits mailing list