[llvm] [mlir][bytecode] Check that bytecode source buffer is sufficiently aligned. (PR #66380)

Christian Sigg via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 07:23:52 PDT 2023


https://github.com/chsigg created https://github.com/llvm/llvm-project/pull/66380:

Adjust test to make a copy of the source buffer that is sufficiently aligned.

I'm not entirely sure a check is sufficient here, or whether the required buffer alignment should somehow be reported back to the user by the ByteCodeWriter. If the current change is reasonable, I can add a test.

>From e9673bb1cfc39f07e7f71c8b8a5e7c0abc068a85 Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg at google.com>
Date: Thu, 14 Sep 2023 16:16:59 +0200
Subject: [PATCH] Check that bytecode source buffer is sufficiently aligned.

Adjust test to make a copy of the source buffer that is sufficiently aligned.
---
 mlir/lib/Bytecode/Reader/BytecodeReader.cpp   | 33 ++++++++++++++-----
 mlir/unittests/Bytecode/BytecodeTest.cpp      | 18 ++++++++--
 .../mlir/unittests/BUILD.bazel                | 21 ++++++++++++
 3 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 483cbfda8d0e565..d6a7ab7f366d3bd 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -11,29 +11,33 @@
 #include "mlir/Bytecode/BytecodeImplementation.h"
 #include "mlir/Bytecode/BytecodeOpInterface.h"
 #include "mlir/Bytecode/Encoding.h"
-#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/Verifier.h"
 #include "mlir/IR/Visitors.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/SourceMgr.h"
+
+#include <cassert>
 #include <cstddef>
+#include <cstdint>
+#include <cstring>
 #include <list>
 #include <memory>
 #include <numeric>
 #include <optional>
+#include <string>
 
 #define DEBUG_TYPE "mlir-bytecode-reader"
 
@@ -93,23 +97,31 @@ namespace {
 class EncodingReader {
 public:
   explicit EncodingReader(ArrayRef<uint8_t> contents, Location fileLoc)
-      : dataIt(contents.data()), dataEnd(contents.end()), fileLoc(fileLoc) {}
+      : buffer(contents), dataIt(buffer.begin()), fileLoc(fileLoc) {}
   explicit EncodingReader(StringRef contents, Location fileLoc)
       : EncodingReader({reinterpret_cast<const uint8_t *>(contents.data()),
                         contents.size()},
                        fileLoc) {}
 
   /// Returns true if the entire section has been read.
-  bool empty() const { return dataIt == dataEnd; }
+  bool empty() const { return dataIt == buffer.end(); }
 
   /// Returns the remaining size of the bytecode.
-  size_t size() const { return dataEnd - dataIt; }
+  size_t size() const { return buffer.end() - dataIt; }
 
   /// Align the current reader position to the specified alignment.
   LogicalResult alignTo(unsigned alignment) {
     if (!llvm::isPowerOf2_32(alignment))
       return emitError("expected alignment to be a power-of-two");
 
+    // Ensure the data buffer was sufficiently aligned in the first place.
+    if (LLVM_UNLIKELY(
+            !llvm::isAddrAligned(llvm::Align(alignment), buffer.begin()))) {
+      return emitError("expected bytecode buffer to be aligned to ", alignment,
+                       ", but got pointer: '0x" +
+                           llvm::utohexstr((uintptr_t)buffer.begin()) + "'");
+    }
+
     // Shift the reader position to the next alignment boundary.
     while (uintptr_t(dataIt) & (uintptr_t(alignment) - 1)) {
       uint8_t padding;
@@ -320,8 +332,11 @@ class EncodingReader {
     return success();
   }
 
-  /// The current data iterator, and an iterator to the end of the buffer.
-  const uint8_t *dataIt, *dataEnd;
+  /// The bytecode buffer.
+  ArrayRef<uint8_t> buffer;
+
+  /// The current iterator within the 'buffer'.
+  const uint8_t *dataIt;
 
   /// A location for the bytecode used to report errors.
   Location fileLoc;
diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index fc86f132dd60b4d..26e63a3bf9ed34b 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Bytecode/BytecodeReader.h"
 #include "mlir/Bytecode/BytecodeWriter.h"
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -19,6 +18,10 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
+#include <algorithm>
+#include <cstdlib>
+#include <memory>
+
 using namespace llvm;
 using namespace mlir;
 
@@ -50,9 +53,18 @@ TEST(Bytecode, MultiModuleWithResource) {
   llvm::raw_string_ostream ostream(buffer);
   ASSERT_TRUE(succeeded(writeBytecodeToFile(module.get(), ostream)));
 
+  // Make a sufficiently aligned copy of the buffer for reading it back.
+  ostream.flush();
+  constexpr std::size_t kAlignment = 16; // AsmResourceBlob alignment.
+  auto deleter = [](char *ptr) { std::free(ptr); };
+  std::unique_ptr<char, decltype(deleter)> aligned_buffer(
+      static_cast<char *>(std::aligned_alloc(kAlignment, buffer.size())),
+      deleter);
+  std::copy(buffer.begin(), buffer.end(), aligned_buffer.get());
+
   // Parse it back
-  OwningOpRef<Operation *> roundTripModule =
-      parseSourceString<Operation *>(ostream.str(), parseConfig);
+  OwningOpRef<Operation *> roundTripModule = parseSourceString<Operation *>(
+      {aligned_buffer.get(), buffer.size()}, parseConfig);
   ASSERT_TRUE(roundTripModule);
 
   // FIXME: Parsing external resources does not work on big-endian
diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
index 13f9822a61d1489..ca6c294171af6dd 100644
--- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
@@ -358,6 +358,27 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "bytecode_tests",
+    size = "small",
+    srcs = glob([
+        "Bytecode/*.cpp",
+        "Bytecode/*.h",
+        "Bytecode/*/*.cpp",
+        "Bytecode/*/*.h",
+    ]),
+    deps = [
+        "//llvm:Support",
+        "//mlir:BytecodeReader",
+        "//mlir:BytecodeWriter",
+        "//mlir:IR",
+        "//mlir:Parser",
+        "//third-party/unittest:gmock",
+        "//third-party/unittest:gtest",
+        "//third-party/unittest:gtest_main",
+    ],
+)
+
 cc_test(
     name = "conversion_tests",
     size = "small",



More information about the llvm-commits mailing list