[Mlir-commits] [mlir] bb0bbed - Fix bytecode reader/writer on big-endian platforms

Ulrich Weigand llvmlistbot at llvm.org
Fri Jun 23 00:23:12 PDT 2023


Author: Ulrich Weigand
Date: 2023-06-23T09:22:55+02:00
New Revision: bb0bbed610d86ba155f9c066c23038f7f34e2dbb

URL: https://github.com/llvm/llvm-project/commit/bb0bbed610d86ba155f9c066c23038f7f34e2dbb
DIFF: https://github.com/llvm/llvm-project/commit/bb0bbed610d86ba155f9c066c23038f7f34e2dbb.diff

LOG: Fix bytecode reader/writer on big-endian platforms

This makes the bytecode reader/writer work on big-endian platforms.
The only problem was related to encoding of multi-byte integers,
where both reader and writer code make implicit assumptions about
endianness of the host platform.

This fixes the current test failures on s390x, and in addition allows
to remove the UNSUPPORTED markers from all other bytecode-related
test cases - they now also all pass on s390x.

Also adding a GFAIL_SKIP to the MultiModuleWithResource unit test,
as this still fails due to an unrelated endian bug regarding
decoding of external resources.

Differential Revision: https://reviews.llvm.org/D153567

Reviewed By: mehdi_amini, jpienaar, rriddle

Added: 
    

Modified: 
    mlir/lib/Bytecode/Reader/BytecodeReader.cpp
    mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
    mlir/test/Bytecode/general.mlir
    mlir/test/Bytecode/invalid/invalid-dialect_section.mlir
    mlir/test/Bytecode/invalid/invalid-ir_section.mlir
    mlir/test/Bytecode/invalid/invalid-string_section.mlir
    mlir/test/Bytecode/invalid/invalid-structure.mlir
    mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir
    mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir
    mlir/test/Bytecode/resources.mlir
    mlir/test/Bytecode/versioning/versioned_attr.mlir
    mlir/test/Bytecode/versioning/versioned_bytecode.mlir
    mlir/test/Bytecode/versioning/versioned_op.mlir
    mlir/test/Dialect/Builtin/Bytecode/attrs.mlir
    mlir/test/Dialect/Builtin/Bytecode/types.mlir
    mlir/test/Dialect/Quant/Bytecode/types.mlir
    mlir/unittests/Bytecode/BytecodeTest.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 177372c68046b..5aa24ba873ec2 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: Support for big-endian architectures.
-
 #include "mlir/Bytecode/BytecodeReader.h"
 #include "mlir/AsmParser/AsmParser.h"
 #include "mlir/Bytecode/BytecodeImplementation.h"
@@ -27,6 +25,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBufferRef.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/SourceMgr.h"
@@ -203,8 +202,14 @@ class EncodingReader {
     // Handle the overwhelming uncommon case where the value required all 8
     // bytes (i.e. a really really big number). In this case, the marker byte is
     // all zeros: `00000000`.
-    if (LLVM_UNLIKELY(result == 0))
-      return parseBytes(sizeof(result), reinterpret_cast<uint8_t *>(&result));
+    if (LLVM_UNLIKELY(result == 0)) {
+      llvm::support::ulittle64_t resultLE;
+      if (failed(parseBytes(sizeof(resultLE),
+                            reinterpret_cast<uint8_t *>(&resultLE))))
+        return failure();
+      result = resultLE;
+      return success();
+    }
     return parseMultiByteVarInt(result);
   }
 
@@ -305,12 +310,13 @@ class EncodingReader {
            "unexpected number of trailing zeros in varint encoding");
 
     // Parse in the remaining bytes of the value.
-    if (failed(parseBytes(numBytes, reinterpret_cast<uint8_t *>(&result) + 1)))
+    llvm::support::ulittle64_t resultLE(result);
+    if (failed(parseBytes(numBytes, reinterpret_cast<uint8_t *>(&resultLE) + 1)))
       return failure();
 
     // Shift out the low-order bits that were used to mark how the value was
     // encoded.
-    result >>= (numBytes + 1);
+    result = resultLE >> (numBytes + 1);
     return success();
   }
 

diff  --git a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
index 03c7a53a34e58..936117aa2b8fc 100644
--- a/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
+++ b/mlir/lib/Bytecode/Writer/BytecodeWriter.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Endian.h"
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
@@ -538,7 +539,8 @@ void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
     if (LLVM_LIKELY(it >>= 7) == 0) {
       uint64_t encodedValue = (value << 1) | 0x1;
       encodedValue <<= (numBytes - 1);
-      emitBytes({reinterpret_cast<uint8_t *>(&encodedValue), numBytes});
+      llvm::support::ulittle64_t encodedValueLE(encodedValue);
+      emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes});
       return;
     }
   }
@@ -546,7 +548,8 @@ void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
   // If the value is too large to encode in a single byte, emit a special all
   // zero marker byte and splat the value directly.
   emitByte(0);
-  emitBytes({reinterpret_cast<uint8_t *>(&value), sizeof(value)});
+  llvm::support::ulittle64_t valueLE(value);
+  emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)});
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/general.mlir b/mlir/test/Bytecode/general.mlir
index 180fb930737a0..228072a0e0161 100644
--- a/mlir/test/Bytecode/general.mlir
+++ b/mlir/test/Bytecode/general.mlir
@@ -1,8 +1,5 @@
 // RUN: mlir-opt -allow-unregistered-dialect -emit-bytecode %s | mlir-opt -allow-unregistered-dialect | FileCheck %s
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 // CHECK-LABEL: "bytecode.test1"
 // CHECK-NEXT:    "unregistered.op"() {test_attr = #test.dynamic_singleton} : () -> ()
 // CHECK-NEXT:    "bytecode.empty"() : () -> ()

diff  --git a/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir b/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir
index 31558bd1a5432..0508c0966710e 100644
--- a/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir
+++ b/mlir/test/Bytecode/invalid/invalid-dialect_section.mlir
@@ -1,9 +1,6 @@
 // This file contains various failure test cases related to the structure of
 // the dialect section.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Dialect Name
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/invalid/invalid-ir_section.mlir b/mlir/test/Bytecode/invalid/invalid-ir_section.mlir
index e8ea9c3a3ae29..be87e81bdd5f3 100644
--- a/mlir/test/Bytecode/invalid/invalid-ir_section.mlir
+++ b/mlir/test/Bytecode/invalid/invalid-ir_section.mlir
@@ -1,9 +1,6 @@
 // This file contains various failure test cases related to the structure of
 // the IR section.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Operations
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/invalid/invalid-string_section.mlir b/mlir/test/Bytecode/invalid/invalid-string_section.mlir
index 64ec9fc93a49f..a39017c3375e5 100644
--- a/mlir/test/Bytecode/invalid/invalid-string_section.mlir
+++ b/mlir/test/Bytecode/invalid/invalid-string_section.mlir
@@ -1,9 +1,6 @@
 // This file contains various failure test cases related to the structure of
 // the string section.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Count
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/invalid/invalid-structure.mlir b/mlir/test/Bytecode/invalid/invalid-structure.mlir
index 1bd4ac4eabbbb..962dff33085da 100644
--- a/mlir/test/Bytecode/invalid/invalid-structure.mlir
+++ b/mlir/test/Bytecode/invalid/invalid-structure.mlir
@@ -1,9 +1,6 @@
 // This file contains various failure test cases related to the structure of
 // a bytecode file.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Version
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir b/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir
index 376a26098cc5a..9a8bc08e6c25b 100644
--- a/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir
+++ b/mlir/test/Bytecode/invalid/invalid_attr_type_offset_section.mlir
@@ -1,9 +1,6 @@
 // This file contains various failure test cases related to the structure of
 // the attribute/type offset section.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Offset
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir b/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir
index 359cd0b1d3950..aba6b3fd1a34a 100644
--- a/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir
+++ b/mlir/test/Bytecode/invalid/invalid_attr_type_section.mlir
@@ -1,9 +1,6 @@
 // This file contains various failure test cases related to the structure of
 // the attribute/type offset section.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Index
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/resources.mlir b/mlir/test/Bytecode/resources.mlir
index 187101acac479..33ed01d20fa0c 100644
--- a/mlir/test/Bytecode/resources.mlir
+++ b/mlir/test/Bytecode/resources.mlir
@@ -1,8 +1,5 @@
 // RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 // CHECK-LABEL: @TestDialectResources
 module @TestDialectResources attributes {
   // CHECK: bytecode.test = dense_resource<decl_resource> : tensor<2xui32>

diff  --git a/mlir/test/Bytecode/versioning/versioned_attr.mlir b/mlir/test/Bytecode/versioning/versioned_attr.mlir
index 0fd6c3c2dfe9e..8ee1a1b4bf376 100644
--- a/mlir/test/Bytecode/versioning/versioned_attr.mlir
+++ b/mlir/test/Bytecode/versioning/versioned_attr.mlir
@@ -1,9 +1,6 @@
 // This file contains a test case representative of a dialect parsing an
 // attribute with versioned custom encoding.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Test attribute upgrade
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/versioning/versioned_bytecode.mlir b/mlir/test/Bytecode/versioning/versioned_bytecode.mlir
index 6fcc3832eec28..30d2a323109b2 100644
--- a/mlir/test/Bytecode/versioning/versioned_bytecode.mlir
+++ b/mlir/test/Bytecode/versioning/versioned_bytecode.mlir
@@ -1,8 +1,5 @@
 // This file contains test cases related to roundtripping.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Test roundtrip
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Bytecode/versioning/versioned_op.mlir b/mlir/test/Bytecode/versioning/versioned_op.mlir
index 5fa170bc24904..0a6ad42e4e821 100644
--- a/mlir/test/Bytecode/versioning/versioned_op.mlir
+++ b/mlir/test/Bytecode/versioning/versioned_op.mlir
@@ -1,9 +1,6 @@
 // This file contains test cases related to the dialect post-parsing upgrade
 // mechanism.
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===--------------------------------------------------------------------===//
 // Test generic
 //===--------------------------------------------------------------------===//

diff  --git a/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir b/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir
index 2702a159be96a..208d331ca0ad3 100644
--- a/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir
+++ b/mlir/test/Dialect/Builtin/Bytecode/attrs.mlir
@@ -1,8 +1,5 @@
 // RUN: mlir-opt -emit-bytecode -allow-unregistered-dialect %s | mlir-opt -allow-unregistered-dialect -mlir-print-local-scope | FileCheck %s
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===----------------------------------------------------------------------===//
 // ArrayAttr
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/Dialect/Builtin/Bytecode/types.mlir b/mlir/test/Dialect/Builtin/Bytecode/types.mlir
index f6d0fa79d1254..bcfbf64c833dd 100644
--- a/mlir/test/Dialect/Builtin/Bytecode/types.mlir
+++ b/mlir/test/Dialect/Builtin/Bytecode/types.mlir
@@ -1,8 +1,5 @@
 // RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===----------------------------------------------------------------------===//
 // ComplexType
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/test/Dialect/Quant/Bytecode/types.mlir b/mlir/test/Dialect/Quant/Bytecode/types.mlir
index 669f8ed233f5b..359a58557087e 100644
--- a/mlir/test/Dialect/Quant/Bytecode/types.mlir
+++ b/mlir/test/Dialect/Quant/Bytecode/types.mlir
@@ -1,8 +1,5 @@
 // RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s
 
-// Bytecode currently does not support big-endian platforms
-// UNSUPPORTED: target=s390x-{{.*}}
-
 //===----------------------------------------------------------------------===//
 // AnyQuantized
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index 96cdbaeef205f..290233d12137a 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Parser/Parser.h"
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -54,6 +55,12 @@ TEST(Bytecode, MultiModuleWithResource) {
       parseSourceString<Operation *>(ostream.str(), parseConfig);
   ASSERT_TRUE(roundTripModule);
 
+  // FIXME: Parsing external resources does not work on big-endian
+  // platforms currently.
+  if (llvm::support::endian::system_endianness() ==
+      llvm::support::endianness::big)
+    GTEST_SKIP();
+
   // Try to see if we have a valid resource in the parsed module.
   auto checkResourceAttribute = [&](Operation *op) {
     Attribute attr = roundTripModule->getAttr("bytecode.test");


        


More information about the Mlir-commits mailing list