[llvm] [mlir][bytecode] Check that bytecode source buffer is sufficiently aligned. (PR #66380)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 07:24:57 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mlir
            
<details>
<summary>Changes</summary>
Adjust test to make a copy of the source buffer that is sufficiently aligned.

I'm not entirely sure a check is sufficient here, or whether the required buffer alignment should somehow be reported back to the user by the ByteCodeWriter. If the current change is reasonable, I can add a test.
--
Full diff: https://github.com/llvm/llvm-project/pull/66380.diff

3 Files Affected:

- (modified) mlir/lib/Bytecode/Reader/BytecodeReader.cpp (+24-9) 
- (modified) mlir/unittests/Bytecode/BytecodeTest.cpp (+15-3) 
- (modified) utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel (+21) 


<pre>
diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 483cbfda8d0e565..d6a7ab7f366d3bd 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -11,29 +11,33 @@
 #include &quot;mlir/Bytecode/BytecodeImplementation.h&quot;
 #include &quot;mlir/Bytecode/BytecodeOpInterface.h&quot;
 #include &quot;mlir/Bytecode/Encoding.h&quot;
-#include &quot;mlir/IR/BuiltinDialect.h&quot;
+#include &quot;mlir/IR/AsmState.h&quot;
 #include &quot;mlir/IR/BuiltinOps.h&quot;
 #include &quot;mlir/IR/Diagnostics.h&quot;
+#include &quot;mlir/IR/Dialect.h&quot;
 #include &quot;mlir/IR/OpImplementation.h&quot;
 #include &quot;mlir/IR/Verifier.h&quot;
 #include &quot;mlir/IR/Visitors.h&quot;
 #include &quot;mlir/Support/LLVM.h&quot;
 #include &quot;mlir/Support/LogicalResult.h&quot;
 #include &quot;llvm/ADT/ArrayRef.h&quot;
-#include &quot;llvm/ADT/MapVector.h&quot;
 #include &quot;llvm/ADT/ScopeExit.h&quot;
-#include &quot;llvm/ADT/SmallString.h&quot;
 #include &quot;llvm/ADT/StringExtras.h&quot;
 #include &quot;llvm/ADT/StringRef.h&quot;
 #include &quot;llvm/Support/Endian.h&quot;
+#include &quot;llvm/Support/ErrorHandling.h&quot;
 #include &quot;llvm/Support/MemoryBufferRef.h&quot;
-#include &quot;llvm/Support/SaveAndRestore.h&quot;
 #include &quot;llvm/Support/SourceMgr.h&quot;
+
+#include &lt;cassert&gt;
 #include &lt;cstddef&gt;
+#include &lt;cstdint&gt;
+#include &lt;cstring&gt;
 #include &lt;list&gt;
 #include &lt;memory&gt;
 #include &lt;numeric&gt;
 #include &lt;optional&gt;
+#include &lt;string&gt;
 
 #define DEBUG_TYPE &quot;mlir-bytecode-reader&quot;
 
@@ -93,23 +97,31 @@ namespace {
 class EncodingReader {
 public:
   explicit EncodingReader(ArrayRef&lt;uint8_t&gt; contents, Location fileLoc)
-      : dataIt(contents.data()), dataEnd(contents.end()), fileLoc(fileLoc) {}
+      : buffer(contents), dataIt(buffer.begin()), fileLoc(fileLoc) {}
   explicit EncodingReader(StringRef contents, Location fileLoc)
       : EncodingReader({reinterpret_cast&lt;const uint8_t *&gt;(contents.data()),
                         contents.size()},
                        fileLoc) {}
 
   /// Returns true if the entire section has been read.
-  bool empty() const { return dataIt == dataEnd; }
+  bool empty() const { return dataIt == buffer.end(); }
 
   /// Returns the remaining size of the bytecode.
-  size_t size() const { return dataEnd - dataIt; }
+  size_t size() const { return buffer.end() - dataIt; }
 
   /// Align the current reader position to the specified alignment.
   LogicalResult alignTo(unsigned alignment) {
     if (!llvm::isPowerOf2_32(alignment))
       return emitError(&quot;expected alignment to be a power-of-two&quot;);
 
+    // Ensure the data buffer was sufficiently aligned in the first place.
+    if (LLVM_UNLIKELY(
+            !llvm::isAddrAligned(llvm::Align(alignment), buffer.begin()))) {
+      return emitError(&quot;expected bytecode buffer to be aligned to &quot;, alignment,
+                       &quot;, but got pointer: &#x27;0x&quot; +
+                           llvm::utohexstr((uintptr_t)buffer.begin()) + &quot;&#x27;&quot;);
+    }
+
     // Shift the reader position to the next alignment boundary.
     while (uintptr_t(dataIt) &amp; (uintptr_t(alignment) - 1)) {
       uint8_t padding;
@@ -320,8 +332,11 @@ class EncodingReader {
     return success();
   }
 
-  /// The current data iterator, and an iterator to the end of the buffer.
-  const uint8_t *dataIt, *dataEnd;
+  /// The bytecode buffer.
+  ArrayRef&lt;uint8_t&gt; buffer;
+
+  /// The current iterator within the &#x27;buffer&#x27;.
+  const uint8_t *dataIt;
 
   /// A location for the bytecode used to report errors.
   Location fileLoc;
diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index fc86f132dd60b4d..26e63a3bf9ed34b 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include &quot;mlir/Bytecode/BytecodeReader.h&quot;
 #include &quot;mlir/Bytecode/BytecodeWriter.h&quot;
 #include &quot;mlir/IR/AsmState.h&quot;
 #include &quot;mlir/IR/BuiltinAttributes.h&quot;
@@ -19,6 +18,10 @@
 #include &quot;gmock/gmock.h&quot;
 #include &quot;gtest/gtest.h&quot;
 
+#include &lt;algorithm&gt;
+#include &lt;cstdlib&gt;
+#include &lt;memory&gt;
+
 using namespace llvm;
 using namespace mlir;
 
@@ -50,9 +53,18 @@ TEST(Bytecode, MultiModuleWithResource) {
   llvm::raw_string_ostream ostream(buffer);
   ASSERT_TRUE(succeeded(writeBytecodeToFile(module.get(), ostream)));
 
+  // Make a sufficiently aligned copy of the buffer for reading it back.
+  ostream.flush();
+  constexpr std::size_t kAlignment = 16; // AsmResourceBlob alignment.
+  auto deleter = [](char *ptr) { std::free(ptr); };
+  std::unique_ptr&lt;char, decltype(deleter)&gt; aligned_buffer(
+      static_cast&lt;char *&gt;(std::aligned_alloc(kAlignment, buffer.size())),
+      deleter);
+  std::copy(buffer.begin(), buffer.end(), aligned_buffer.get());
+
   // Parse it back
-  OwningOpRef&lt;Operation *&gt; roundTripModule =
-      parseSourceString&lt;Operation *&gt;(ostream.str(), parseConfig);
+  OwningOpRef&lt;Operation *&gt; roundTripModule = parseSourceString&lt;Operation *&gt;(
+      {aligned_buffer.get(), buffer.size()}, parseConfig);
   ASSERT_TRUE(roundTripModule);
 
   // FIXME: Parsing external resources does not work on big-endian
diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
index 13f9822a61d1489..ca6c294171af6dd 100644
--- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel
@@ -358,6 +358,27 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = &quot;bytecode_tests&quot;,
+    size = &quot;small&quot;,
+    srcs = glob([
+        &quot;Bytecode/*.cpp&quot;,
+        &quot;Bytecode/*.h&quot;,
+        &quot;Bytecode/*/*.cpp&quot;,
+        &quot;Bytecode/*/*.h&quot;,
+    ]),
+    deps = [
+        &quot;//llvm:Support&quot;,
+        &quot;//mlir:BytecodeReader&quot;,
+        &quot;//mlir:BytecodeWriter&quot;,
+        &quot;//mlir:IR&quot;,
+        &quot;//mlir:Parser&quot;,
+        &quot;//third-party/unittest:gmock&quot;,
+        &quot;//third-party/unittest:gtest&quot;,
+        &quot;//third-party/unittest:gtest_main&quot;,
+    ],
+)
+
 cc_test(
     name = &quot;conversion_tests&quot;,
     size = &quot;small&quot;,
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66380


More information about the llvm-commits mailing list