[llvm] [mlir][bytecode] Check that bytecode source buffer is sufficiently aligned. (PR #66380)
Christian Sigg via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 07:23:52 PDT 2023
https://github.com/chsigg created https://github.com/llvm/llvm-project/pull/66380:
Adjust test to make a copy of the source buffer that is sufficiently aligned.
I'm not entirely sure a check is sufficient here, or whether the required buffer alignment should somehow be reported back to the user by the ByteCodeWriter. If the current change is reasonable, I can add a test.
>From e9673bb1cfc39f07e7f71c8b8a5e7c0abc068a85 Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg at google.com>
Date: Thu, 14 Sep 2023 16:16:59 +0200
Subject: [PATCH] Check that bytecode source buffer is sufficiently aligned.
Adjust test to make a copy of the source buffer that is sufficiently aligned.
---
mlir/lib/Bytecode/Reader/BytecodeReader.cpp | 33 ++++++++++++++-----
mlir/unittests/Bytecode/BytecodeTest.cpp | 18 ++++++++--
.../mlir/unittests/BUILD.bazel | 21 ++++++++++++
3 files changed, 60 insertions(+), 12 deletions(-)
diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 483cbfda8d0e565..d6a7ab7f366d3bd 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -11,29 +11,33 @@
#include "mlir/Bytecode/BytecodeImplementation.h"
#include "mlir/Bytecode/BytecodeOpInterface.h"
#include "mlir/Bytecode/Encoding.h"
-#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Diagnostics.h"
+#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/Verifier.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Support/LogicalResult.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/SourceMgr.h"
+
+#include <cassert>
#include <cstddef>
+#include <cstdint>
+#include <cstring>
#include <list>
#include <memory>
#include <numeric>
#include <optional>
+#include <string>
#define DEBUG_TYPE "mlir-bytecode-reader"
@@ -93,23 +97,31 @@ namespace {
class EncodingReader {
public:
explicit EncodingReader(ArrayRef<uint8_t> contents, Location fileLoc)
- : dataIt(contents.data()), dataEnd(contents.end()), fileLoc(fileLoc) {}
+ : buffer(contents), dataIt(buffer.begin()), fileLoc(fileLoc) {}
explicit EncodingReader(StringRef contents, Location fileLoc)
: EncodingReader({reinterpret_cast<const uint8_t *>(contents.data()),
contents.size()},
fileLoc) {}
/// Returns true if the entire section has been read.
- bool empty() const { return dataIt == dataEnd; }
+ bool empty() const { return dataIt == buffer.end(); }
/// Returns the remaining size of the bytecode.
- size_t size() const { return dataEnd - dataIt; }
+ size_t size() const { return buffer.end() - dataIt; }
/// Align the current reader position to the specified alignment.
LogicalResult alignTo(unsigned alignment) {
if (!llvm::isPowerOf2_32(alignment))
return emitError("expected alignment to be a power-of-two");
+ // Ensure the data buffer was sufficiently aligned in the first place.
+ if (LLVM_UNLIKELY(
+ !llvm::isAddrAligned(llvm::Align(alignment), buffer.begin()))) {
+ return emitError("expected bytecode buffer to be aligned to ", alignment,
+ ", but got pointer: '0x" +
+ llvm::utohexstr((uintptr_t)buffer.begin()) + "'");
+ }
+
// Shift the reader position to the next alignment boundary.
while (uintptr_t(dataIt) & (uintptr_t(alignment) - 1)) {
uint8_t padding;
@@ -320,8 +332,11 @@ class EncodingReader {
return success();
}
- /// The current data iterator, and an iterator to the end of the buffer.
- const uint8_t *dataIt, *dataEnd;
+ /// The bytecode buffer.
+ ArrayRef<uint8_t> buffer;
+
+ /// The current iterator within the 'buffer'.
+ const uint8_t *dataIt;
/// A location for the bytecode used to report errors.
Location fileLoc;
diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index fc86f132dd60b4d..26e63a3bf9ed34b 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Bytecode/BytecodeReader.h"
#include "mlir/Bytecode/BytecodeWriter.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinAttributes.h"
@@ -19,6 +18,10 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include <algorithm>
+#include <cstdlib>
+#include <memory>
+
using namespace llvm;
using namespace mlir;
@@ -50,9 +53,18 @@ TEST(Bytecode, MultiModuleWithResource) {
llvm::raw_string_ostream ostream(buffer);
ASSERT_TRUE(succeeded(writeBytecodeToFile(module.get(), ostream)));
+ // Make a sufficiently aligned copy of the buffer for reading it back.
+ ostream.flush();
+ constexpr std::size_t kAlignment = 16; // AsmResourceBlob alignment.
+ auto deleter = [](char *ptr) { std::free(ptr); };
+ std::unique_ptr<char, decltype(deleter)> aligned_buffer(
+ static_cast<char *>(std::aligned_alloc(kAlignment, buffer.size())),
+ deleter);
+ std::copy(buffer.begin(), buffer.end(), aligned_buffer.get());
+
// Parse it back
- OwningOpRef<Operation *> roundTripModule =
- parseSourceString<Operation *>(ostream.str(), parseConfig);
+ OwningOpRef<Operation *> roundTripModule = parseSourceString<Operation *>(
+ {aligned_buffer.get(), buffer.size()}, parseConfig);
ASSERT_TRUE(roundTripModule);
// FIXME: Parsing external resources does not work on big-endian
diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
index 13f9822a61d1489..ca6c294171af6dd 100644
--- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
@@ -358,6 +358,27 @@ cc_test(
],
)
+cc_test(
+ name = "bytecode_tests",
+ size = "small",
+ srcs = glob([
+ "Bytecode/*.cpp",
+ "Bytecode/*.h",
+ "Bytecode/*/*.cpp",
+ "Bytecode/*/*.h",
+ ]),
+ deps = [
+ "//llvm:Support",
+ "//mlir:BytecodeReader",
+ "//mlir:BytecodeWriter",
+ "//mlir:IR",
+ "//mlir:Parser",
+ "//third-party/unittest:gmock",
+ "//third-party/unittest:gtest",
+ "//third-party/unittest:gtest_main",
+ ],
+)
+
cc_test(
name = "conversion_tests",
size = "small",
More information about the llvm-commits
mailing list