[Mlir-commits] [mlir] bf87d5a - [MLIR][Parser] Add `parseBase64Bytes`.

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Nov 18 08:13:36 PST 2022


Author: bzcheeseman
Date: 2022-11-18T08:13:30-08:00
New Revision: bf87d5ad8207b6002d0ff247f4803698be35950d

URL: https://github.com/llvm/llvm-project/commit/bf87d5ad8207b6002d0ff247f4803698be35950d
DIFF: https://github.com/llvm/llvm-project/commit/bf87d5ad8207b6002d0ff247f4803698be35950d.diff

LOG: [MLIR][Parser] Add `parseBase64Bytes`.

This patch adds `parseBase64Bytes` to the parser. It attempts to avoid double-allocating the buffer by re-using the token's spelling directly and eliding the quotes if they exist. It also avoids extra allocations by using std::vector<char> in the API - something we should change when the llvm::decodeBase64 API changes.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D138090

Added: 
    

Modified: 
    mlir/include/mlir/IR/OpImplementation.h
    mlir/lib/AsmParser/AsmParserImpl.h
    mlir/test/IR/parser.mlir
    mlir/test/lib/Dialect/Test/TestDialect.cpp
    mlir/test/lib/Dialect/Test/TestOps.td

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index 0c74af3508aef..c6e5abd904fe9 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -577,6 +577,9 @@ class AsmParser {
   /// Parse a quoted string token if present.
   virtual ParseResult parseOptionalString(std::string *string) = 0;
 
+  /// Parses a Base64 encoded string of bytes.
+  virtual ParseResult parseBase64Bytes(std::vector<char> *bytes) = 0;
+
   /// Parse a `(` token.
   virtual ParseResult parseLParen() = 0;
 

diff  --git a/mlir/lib/AsmParser/AsmParserImpl.h b/mlir/lib/AsmParser/AsmParserImpl.h
index d7e8a55089d33..cf9b774b4d6e0 100644
--- a/mlir/lib/AsmParser/AsmParserImpl.h
+++ b/mlir/lib/AsmParser/AsmParserImpl.h
@@ -13,6 +13,7 @@
 #include "mlir/AsmParser/AsmParserState.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/OpImplementation.h"
+#include "llvm/Support/Base64.h"
 
 namespace mlir {
 namespace detail {
@@ -245,6 +246,28 @@ class AsmParserImpl : public BaseT {
     return success();
   }
 
+  /// Parses a Base64 encoded string of bytes.
+  ParseResult parseBase64Bytes(std::vector<char> *bytes) override {
+    auto loc = getCurrentLocation();
+    if (!parser.getToken().is(Token::string))
+      return emitError(loc, "expected string");
+
+    if (bytes) {
+      // decodeBase64 doesn't modify its input so we can use the token spelling
+      // and just slice off the quotes/whitespaces if there are any. Whitespace
+      // and quotes cannot appear as part of a (standard) base64 encoded string,
+      // so this is safe to do.
+      StringRef b64QuotedString = parser.getTokenSpelling();
+      StringRef b64String =
+          b64QuotedString.ltrim("\"  \t\n\v\f\r").rtrim("\" \t\n\v\f\r");
+      if (auto err = llvm::decodeBase64(b64String, *bytes))
+        return emitError(loc, toString(std::move(err)));
+    }
+
+    parser.consumeToken();
+    return success();
+  }
+
   /// Parse a floating point value from the stream.
   ParseResult parseFloat(double &result) override {
     bool isNegative = parser.consumeIf(Token::minus);

diff  --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index 82b3bd6109ad8..75ebc1a7536c3 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -1185,6 +1185,13 @@ func.func @parse_wrapped_keyword_test() {
   return
 }
 
+// CHECK-LABEL: func @parse_base64_test
+func.func @parse_base64_test() {
+  // CHECK: test.parse_b64 "hello world"
+  test.parse_b64 "aGVsbG8gd29ybGQ="
+  return
+}
+
 // CHECK-LABEL: func @"\22_string_symbol_reference\22"
 func.func @"\"_string_symbol_reference\""() {
   // CHECK: ref = @"\22_string_symbol_reference\22"

diff  --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
index c9d50c44d9dd8..b560df92ba98a 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -862,6 +862,21 @@ ParseResult ParseWrappedKeywordOp::parse(OpAsmParser &parser,
 
 void ParseWrappedKeywordOp::print(OpAsmPrinter &p) { p << " " << getKeyword(); }
 
+ParseResult ParseB64BytesOp::parse(OpAsmParser &parser,
+                                   OperationState &result) {
+  std::vector<char> bytes;
+  if (parser.parseBase64Bytes(&bytes))
+    return failure();
+  result.addAttribute("b64", parser.getBuilder().getStringAttr(
+                                 StringRef(&bytes.front(), bytes.size())));
+  return success();
+}
+
+void ParseB64BytesOp::print(OpAsmPrinter &p) {
+  // Don't print the base64 version to check that we decoded it correctly.
+  p << " \"" << getB64() << "\"";
+}
+
 //===----------------------------------------------------------------------===//
 // Test WrapRegionOp - wrapping op exercising `parseGenericOperation()`.
 

diff  --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index cd447d7fbe97b..660ce7d05e1a8 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -1766,6 +1766,11 @@ def ParseWrappedKeywordOp : TEST_Op<"parse_wrapped_keyword"> {
   let hasCustomAssemblyFormat = 1;
 }
 
+def ParseB64BytesOp : TEST_Op<"parse_b64"> {
+  let arguments = (ins StrAttr:$b64);
+  let hasCustomAssemblyFormat = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Test region argument list parsing.
 


        


More information about the Mlir-commits mailing list