[Mlir-commits] [mlir] [mlir] Make the split markers of splitAndProcessBuffer configurable. (PR #84765)

Ingo Müller llvmlistbot at llvm.org
Mon Mar 11 07:39:00 PDT 2024


https://github.com/ingomueller-net created https://github.com/llvm/llvm-project/pull/84765

This allows to define custom splitters, which is interesting for non-MLIR inputs and outputs to `mlir-translate`. For example, one way use `; -----` as a splitter of `.ll` files. The splitters now passes as arguments into `splitAndProcessBuffer`, both of which default to MLIR's default of `// -----`. The behavior of the input split marker should not change at all; however, outputs now have one new line *more* than before if there is no splitter (old: `insertMarkerInOutput = false`, new: `outputSplitMarker = ""`) and one new line *less* if there is one. These parameters are exposed as command line options of `mlir-translate` as `input-split-marker` and `output-split-marker`, which default to `"// -----"` and `""` (without quotes), respectively, which exactly corresponds to the previous behavior (module the new lines mentioned before).

>From 04045be7ae5fb351488bc0856fbed6bda584059b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Mon, 11 Mar 2024 13:14:21 +0000
Subject: [PATCH 1/2] Make filt split marker ('// -----') a global constant.

This allows all tools to use that constant defined in `ToolUtilities.h`,
instead of copying that string.
---
 mlir/include/mlir/Support/ToolUtilities.h          | 2 ++
 mlir/lib/Support/ToolUtilities.cpp                 | 7 ++++---
 mlir/lib/Tools/lsp-server-support/Transport.cpp    | 3 ++-
 mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp      | 6 ++----
 mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 9 ++++-----
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index d2c89409c0653b..cd20661d2ce038 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -27,6 +27,8 @@ struct LogicalResult;
 using ChunkBufferHandler = function_ref<LogicalResult(
     std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
 
+extern inline const char *const kDefaultSplitMarker = "// -----";
+
 /// Splits the specified buffer on a marker (`// -----`), processes each chunk
 /// independently according to the normal `processChunkBuffer` logic, and writes
 /// all results to `os`.
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ee0214f3d8ac04..1c5dba6650726e 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -27,8 +27,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
   if (!enableSplitting)
     return processChunkBuffer(std::move(originalBuffer), os);
 
-  const char splitMarkerConst[] = "// -----";
-  StringRef splitMarker(splitMarkerConst);
+  StringRef splitMarker(kDefaultSplitMarker);
   const int splitMarkerLen = splitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
@@ -89,7 +88,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       hadFailure = true;
   };
   llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput ? "\n// -----\n" : "");
+                   insertMarkerInOutput
+                       ? (llvm::Twine("\n") + kDefaultSplitMarker + "\n").str()
+                       : "");
 
   // If any fails, then return a failure of the tool.
   return failure(hadFailure);
diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp
index df675cf78210be..64dea35614c070 100644
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Tools/lsp-server-support/Transport.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/Protocol.h"
 #include "llvm/ADT/SmallString.h"
@@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
     StringRef lineRef = line.str().trim();
     if (lineRef.starts_with("//")) {
       // Found a delimiter for the message.
-      if (lineRef == "// -----")
+      if (lineRef == kDefaultSplitMarker)
         break;
       continue;
     }
diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
index de657a3df9ef7b..ed75b4a90536eb 100644
--- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
+++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
@@ -15,6 +15,7 @@
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Parser/Parser.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
   context.allowUnregisteredDialects();
 
   // Split the file into separate MLIR documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
       context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));
 
diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
index a5c6c2bb2c6a0c..d282ee8f61d8fe 100644
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@@ -10,6 +10,7 @@
 
 #include "Protocol.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/PDLL/AST/Context.h"
 #include "mlir/Tools/PDLL/AST/Nodes.h"
 #include "mlir/Tools/PDLL/AST/Types.h"
@@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
         [&](PDLTextFileChunk &chunk) {
           chunk.document.getPDLLViewOutput(outputOS, kind);
         },
-        [&] { outputOS << "\n// -----\n\n"; });
+        [&] { outputOS << "\n"
+                       << kDefaultSplitMarker << "\n\n"; });
   }
   return result;
 }
@@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
   chunks.clear();
 
   // Split the file into separate PDL documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
       /*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
       diagnostics));

>From ac9808d9b23a0145788e13f226e1cd7caf493c4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Mon, 11 Mar 2024 14:15:34 +0000
Subject: [PATCH 2/2] Make the split markers of splitAndProcessBuffer
 configurable.

---
 mlir/include/mlir/Support/ToolUtilities.h     | 15 ++++++----
 mlir/lib/Support/ToolUtilities.cpp            | 21 +++++++-------
 mlir/lib/Tools/mlir-opt/MlirOptMain.cpp       |  3 +-
 .../mlir-translate/MlirTranslateMain.cpp      | 11 +++++++-
 mlir/test/mlir-translate/split-markers.mlir   | 28 +++++++++++++++++++
 5 files changed, 58 insertions(+), 20 deletions(-)
 create mode 100644 mlir/test/mlir-translate/split-markers.mlir

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index cd20661d2ce038..c85c295824c714 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -15,6 +15,8 @@
 
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
 #include <memory>
 
 namespace llvm {
@@ -29,20 +31,21 @@ using ChunkBufferHandler = function_ref<LogicalResult(
 
 extern inline const char *const kDefaultSplitMarker = "// -----";
 
-/// Splits the specified buffer on a marker (`// -----`), processes each chunk
-/// independently according to the normal `processChunkBuffer` logic, and writes
-/// all results to `os`.
+/// Splits the specified buffer on a marker (`// -----` by default), processes
+/// each chunk independently according to the normal `processChunkBuffer` logic,
+/// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
 /// into a single file. `enableSplitting` can be used to toggle if splitting
 /// should be enabled, e.g. to allow for merging split and non-split code paths.
-/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
-/// between each of the processed output chunks.
+/// Output split markers (`//-----` by default) are placed between each of the
+/// processed output chunks.
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,
                       bool enableSplitting = true,
-                      bool insertMarkerInOutput = false);
+                      llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
+                      llvm::StringRef outputSplitMarker = kDefaultSplitMarker);
 } // namespace mlir
 
 #endif // MLIR_SUPPORT_TOOLUTILITIES_H
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index 1c5dba6650726e..ec2ffe6ebb8de7 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -22,20 +22,20 @@ LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                             ChunkBufferHandler processChunkBuffer,
                             raw_ostream &os, bool enableSplitting,
-                            bool insertMarkerInOutput) {
+                            llvm::StringRef inputSplitMarker,
+                            llvm::StringRef outputSplitMarker) {
   // If splitting is disabled, we process the full input buffer.
   if (!enableSplitting)
     return processChunkBuffer(std::move(originalBuffer), os);
 
-  StringRef splitMarker(kDefaultSplitMarker);
-  const int splitMarkerLen = splitMarker.size();
+  const int inputSplitMarkerLen = inputSplitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
   SmallVector<StringRef, 8> rawSourceBuffers;
   const int checkLen = 2;
   // Split dropping the last checkLen chars to enable flagging near misses.
   origMemBuffer->getBuffer().split(rawSourceBuffers,
-                                   splitMarker.drop_back(checkLen));
+                                   inputSplitMarker.drop_back(checkLen));
   if (rawSourceBuffers.empty())
     return success();
 
@@ -57,8 +57,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
     }
 
     // Check that suffix is as expected and doesn't have any dash post.
-    bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
-                          buffer.size() > checkLen && buffer[checkLen] != '0';
+    bool expectedSuffix =
+        buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
+        buffer.size() > checkLen && buffer[checkLen] != '0';
     if (expectedSuffix) {
       sourceBuffers.push_back(prev);
       prev = buffer.drop_front(checkLen);
@@ -68,8 +69,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
                                  llvm::SourceMgr::DK_Warning,
                                  "near miss with file split marker");
-      prev = StringRef(prev.data(),
-                       prev.size() + splitMarkerLen - checkLen + buffer.size());
+      prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
+                                        checkLen + buffer.size());
     }
   }
   if (!prev.empty())
@@ -88,9 +89,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       hadFailure = true;
   };
   llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput
-                       ? (llvm::Twine("\n") + kDefaultSplitMarker + "\n").str()
-                       : "");
+                   (llvm::Twine(outputSplitMarker) + "\n").str());
 
   // If any fails, then return a failure of the tool.
   return failure(hadFailure);
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index b62557153b4167..20f45d11f2e403 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -533,8 +533,7 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                          threadPool);
   };
   return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
-                               config.shouldSplitInputFile(),
-                               /*insertMarkerInOutput=*/true);
+                               config.shouldSplitInputFile());
 }
 
 LogicalResult mlir::MlirOptMain(int argc, char **argv,
diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
index 92adb8d6ac97c6..c2499d8c2e2cac 100644
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@@ -80,6 +80,14 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
                      "(discouraged: testing only!)"),
       llvm::cl::init(false));
 
+  static llvm::cl::opt<std::string> inputSplitMarker(
+      "input-split-marker", llvm::cl::desc("Split marker to use for the input"),
+      llvm::cl::init(kDefaultSplitMarker));
+
+  static llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for the ouput"), llvm::cl::init(""));
+
   llvm::InitLLVM y(argc, argv);
 
   // Add flags for all the registered translations.
@@ -176,7 +184,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
   };
 
   if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                   output->os(), splitInputFile)))
+                                   output->os(), splitInputFile,
+                                   inputSplitMarker, outputSplitMarker)))
     return failure();
 
   output->keep();
diff --git a/mlir/test/mlir-translate/split-markers.mlir b/mlir/test/mlir-translate/split-markers.mlir
new file mode 100644
index 00000000000000..d9408ad0cea4d4
--- /dev/null
+++ b/mlir/test/mlir-translate/split-markers.mlir
@@ -0,0 +1,28 @@
+// Check that (1) the output split marker is inserted and (2) the default split
+// marker is used for the input.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
+
+// With the second command, check that (3) the input split marker is used and
+// (4) the output split marker is empty if not specified.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | mlir-translate -split-input-file -import-llvm \
+// RUN:   -input-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
+
+// CHECK-OUTPUT:      ModuleID
+// CHECK-OUTPUT:      ; -----
+// CHECK-OUTPUT-NEXT: ModuleID
+
+// CHECK-ROUNDTRIP:       module {{.*}} {
+// CHECK-ROUNDTRIP-NEXT:  }
+// CHECK-ROUNDTRIP-EMPTY:
+// CHECK-ROUNDTRIP:       module
+
+module {}
+
+// -----
+
+module {}



More information about the Mlir-commits mailing list