[Mlir-commits] [mlir] [mlir] Make the split markers of splitAndProcessBuffer configurable. (PR #84765)

Ingo Müller llvmlistbot at llvm.org
Thu Mar 14 01:54:47 PDT 2024


https://github.com/ingomueller-net updated https://github.com/llvm/llvm-project/pull/84765

>From 04045be7ae5fb351488bc0856fbed6bda584059b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Mon, 11 Mar 2024 13:14:21 +0000
Subject: [PATCH 1/7] Make filt split marker ('// -----') a global constant.

This allows all tools to use that constant defined in `ToolUtilities.h`,
instead of copying that string.
---
 mlir/include/mlir/Support/ToolUtilities.h          | 2 ++
 mlir/lib/Support/ToolUtilities.cpp                 | 7 ++++---
 mlir/lib/Tools/lsp-server-support/Transport.cpp    | 3 ++-
 mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp      | 6 ++----
 mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp | 9 ++++-----
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index d2c89409c0653b..cd20661d2ce038 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -27,6 +27,8 @@ struct LogicalResult;
 using ChunkBufferHandler = function_ref<LogicalResult(
     std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
 
+extern inline const char *const kDefaultSplitMarker = "// -----";
+
 /// Splits the specified buffer on a marker (`// -----`), processes each chunk
 /// independently according to the normal `processChunkBuffer` logic, and writes
 /// all results to `os`.
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ee0214f3d8ac04..1c5dba6650726e 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -27,8 +27,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
   if (!enableSplitting)
     return processChunkBuffer(std::move(originalBuffer), os);
 
-  const char splitMarkerConst[] = "// -----";
-  StringRef splitMarker(splitMarkerConst);
+  StringRef splitMarker(kDefaultSplitMarker);
   const int splitMarkerLen = splitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
@@ -89,7 +88,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       hadFailure = true;
   };
   llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput ? "\n// -----\n" : "");
+                   insertMarkerInOutput
+                       ? (llvm::Twine("\n") + kDefaultSplitMarker + "\n").str()
+                       : "");
 
   // If any fails, then return a failure of the tool.
   return failure(hadFailure);
diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp
index df675cf78210be..64dea35614c070 100644
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Tools/lsp-server-support/Transport.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/Protocol.h"
 #include "llvm/ADT/SmallString.h"
@@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
     StringRef lineRef = line.str().trim();
     if (lineRef.starts_with("//")) {
       // Found a delimiter for the message.
-      if (lineRef == "// -----")
+      if (lineRef == kDefaultSplitMarker)
         break;
       continue;
     }
diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
index de657a3df9ef7b..ed75b4a90536eb 100644
--- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
+++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
@@ -15,6 +15,7 @@
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Parser/Parser.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
   context.allowUnregisteredDialects();
 
   // Split the file into separate MLIR documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
       context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));
 
diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
index a5c6c2bb2c6a0c..d282ee8f61d8fe 100644
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@@ -10,6 +10,7 @@
 
 #include "Protocol.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/PDLL/AST/Context.h"
 #include "mlir/Tools/PDLL/AST/Nodes.h"
 #include "mlir/Tools/PDLL/AST/Types.h"
@@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
         [&](PDLTextFileChunk &chunk) {
           chunk.document.getPDLLViewOutput(outputOS, kind);
         },
-        [&] { outputOS << "\n// -----\n\n"; });
+        [&] { outputOS << "\n"
+                       << kDefaultSplitMarker << "\n\n"; });
   }
   return result;
 }
@@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
   chunks.clear();
 
   // Split the file into separate PDL documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
       /*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
       diagnostics));

>From ac9808d9b23a0145788e13f226e1cd7caf493c4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Mon, 11 Mar 2024 14:15:34 +0000
Subject: [PATCH 2/7] Make the split markers of splitAndProcessBuffer
 configurable.

---
 mlir/include/mlir/Support/ToolUtilities.h     | 15 ++++++----
 mlir/lib/Support/ToolUtilities.cpp            | 21 +++++++-------
 mlir/lib/Tools/mlir-opt/MlirOptMain.cpp       |  3 +-
 .../mlir-translate/MlirTranslateMain.cpp      | 11 +++++++-
 mlir/test/mlir-translate/split-markers.mlir   | 28 +++++++++++++++++++
 5 files changed, 58 insertions(+), 20 deletions(-)
 create mode 100644 mlir/test/mlir-translate/split-markers.mlir

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index cd20661d2ce038..c85c295824c714 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -15,6 +15,8 @@
 
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
 #include <memory>
 
 namespace llvm {
@@ -29,20 +31,21 @@ using ChunkBufferHandler = function_ref<LogicalResult(
 
 extern inline const char *const kDefaultSplitMarker = "// -----";
 
-/// Splits the specified buffer on a marker (`// -----`), processes each chunk
-/// independently according to the normal `processChunkBuffer` logic, and writes
-/// all results to `os`.
+/// Splits the specified buffer on a marker (`// -----` by default), processes
+/// each chunk independently according to the normal `processChunkBuffer` logic,
+/// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
 /// into a single file. `enableSplitting` can be used to toggle if splitting
 /// should be enabled, e.g. to allow for merging split and non-split code paths.
-/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
-/// between each of the processed output chunks.
+/// Output split markers (`//-----` by default) are placed between each of the
+/// processed output chunks.
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,
                       bool enableSplitting = true,
-                      bool insertMarkerInOutput = false);
+                      llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
+                      llvm::StringRef outputSplitMarker = kDefaultSplitMarker);
 } // namespace mlir
 
 #endif // MLIR_SUPPORT_TOOLUTILITIES_H
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index 1c5dba6650726e..ec2ffe6ebb8de7 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -22,20 +22,20 @@ LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                             ChunkBufferHandler processChunkBuffer,
                             raw_ostream &os, bool enableSplitting,
-                            bool insertMarkerInOutput) {
+                            llvm::StringRef inputSplitMarker,
+                            llvm::StringRef outputSplitMarker) {
   // If splitting is disabled, we process the full input buffer.
   if (!enableSplitting)
     return processChunkBuffer(std::move(originalBuffer), os);
 
-  StringRef splitMarker(kDefaultSplitMarker);
-  const int splitMarkerLen = splitMarker.size();
+  const int inputSplitMarkerLen = inputSplitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
   SmallVector<StringRef, 8> rawSourceBuffers;
   const int checkLen = 2;
   // Split dropping the last checkLen chars to enable flagging near misses.
   origMemBuffer->getBuffer().split(rawSourceBuffers,
-                                   splitMarker.drop_back(checkLen));
+                                   inputSplitMarker.drop_back(checkLen));
   if (rawSourceBuffers.empty())
     return success();
 
@@ -57,8 +57,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
     }
 
     // Check that suffix is as expected and doesn't have any dash post.
-    bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
-                          buffer.size() > checkLen && buffer[checkLen] != '0';
+    bool expectedSuffix =
+        buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
+        buffer.size() > checkLen && buffer[checkLen] != '0';
     if (expectedSuffix) {
       sourceBuffers.push_back(prev);
       prev = buffer.drop_front(checkLen);
@@ -68,8 +69,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
                                  llvm::SourceMgr::DK_Warning,
                                  "near miss with file split marker");
-      prev = StringRef(prev.data(),
-                       prev.size() + splitMarkerLen - checkLen + buffer.size());
+      prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
+                                        checkLen + buffer.size());
     }
   }
   if (!prev.empty())
@@ -88,9 +89,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       hadFailure = true;
   };
   llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput
-                       ? (llvm::Twine("\n") + kDefaultSplitMarker + "\n").str()
-                       : "");
+                   (llvm::Twine(outputSplitMarker) + "\n").str());
 
   // If any fails, then return a failure of the tool.
   return failure(hadFailure);
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index b62557153b4167..20f45d11f2e403 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -533,8 +533,7 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                          threadPool);
   };
   return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
-                               config.shouldSplitInputFile(),
-                               /*insertMarkerInOutput=*/true);
+                               config.shouldSplitInputFile());
 }
 
 LogicalResult mlir::MlirOptMain(int argc, char **argv,
diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
index 92adb8d6ac97c6..c2499d8c2e2cac 100644
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@@ -80,6 +80,14 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
                      "(discouraged: testing only!)"),
       llvm::cl::init(false));
 
+  static llvm::cl::opt<std::string> inputSplitMarker(
+      "input-split-marker", llvm::cl::desc("Split marker to use for the input"),
+      llvm::cl::init(kDefaultSplitMarker));
+
+  static llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for the ouput"), llvm::cl::init(""));
+
   llvm::InitLLVM y(argc, argv);
 
   // Add flags for all the registered translations.
@@ -176,7 +184,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
   };
 
   if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                   output->os(), splitInputFile)))
+                                   output->os(), splitInputFile,
+                                   inputSplitMarker, outputSplitMarker)))
     return failure();
 
   output->keep();
diff --git a/mlir/test/mlir-translate/split-markers.mlir b/mlir/test/mlir-translate/split-markers.mlir
new file mode 100644
index 00000000000000..d9408ad0cea4d4
--- /dev/null
+++ b/mlir/test/mlir-translate/split-markers.mlir
@@ -0,0 +1,28 @@
+// Check that (1) the output split marker is inserted and (2) the default split
+// marker is used for the input.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
+
+// With the second command, check that (3) the input split marker is used and
+// (4) the output split marker is empty if not specified.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | mlir-translate -split-input-file -import-llvm \
+// RUN:   -input-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
+
+// CHECK-OUTPUT:      ModuleID
+// CHECK-OUTPUT:      ; -----
+// CHECK-OUTPUT-NEXT: ModuleID
+
+// CHECK-ROUNDTRIP:       module {{.*}} {
+// CHECK-ROUNDTRIP-NEXT:  }
+// CHECK-ROUNDTRIP-EMPTY:
+// CHECK-ROUNDTRIP:       module
+
+module {}
+
+// -----
+
+module {}

>From 6ba37c9a8545aa140f229d814086edf32681bad4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Tue, 12 Mar 2024 10:14:09 +0000
Subject: [PATCH 3/7] Merge `-input-split-marker` into `-split-input-file`.

In this new version, specifying `-split-input-file` without value uses
the default marker (`// -----`) implicitly, which splits the input on
that marker, whereas not specifying that flag uses the empty marker,
which does not split the input. This makes all previous usages of the
flag work as before (modulo the subtle new-line changes introduced in
the previous commit). A custom splitter can be used by providing a value
to the same flag (`-split-input-file="; -----"`).
---
 mlir/include/mlir/Support/ToolUtilities.h     |  9 +++---
 .../include/mlir/Tools/mlir-opt/MlirOptMain.h | 31 ++++++++++++++-----
 mlir/lib/Support/ToolUtilities.cpp            |  5 ++-
 mlir/lib/Tools/mlir-opt/MlirOptMain.cpp       | 23 ++++++++++----
 .../mlir-translate/MlirTranslateMain.cpp      | 26 +++++++++-------
 mlir/test/mlir-opt/nearmiss.mlir              | 15 ++++++++-
 mlir/test/mlir-translate/split-markers.mlir   | 15 ++++++---
 mlir/tools/mlir-pdll/mlir-pdll.cpp            |  9 +++---
 8 files changed, 90 insertions(+), 43 deletions(-)

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index c85c295824c714..73b7571fb56b7c 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -36,14 +36,13 @@ extern inline const char *const kDefaultSplitMarker = "// -----";
 /// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
-/// into a single file. `enableSplitting` can be used to toggle if splitting
-/// should be enabled, e.g. to allow for merging split and non-split code paths.
-/// Output split markers (`//-----` by default) are placed between each of the
-/// processed output chunks.
+/// into a single file. The input split marker is configurable. If it is empty
+/// merging is disabled, which allows for merging split and non-split code
+/// paths. Output split markers (`//-----` by default) are placed between each
+/// of the processed output chunks.
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,
-                      bool enableSplitting = true,
                       llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
                       llvm::StringRef outputSplitMarker = kDefaultSplitMarker);
 } // namespace mlir
diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
index 6e90fad1618d21..8adc80908de116 100644
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@@ -15,6 +15,7 @@
 
 #include "mlir/Debug/CLOptionsSetup.h"
 #include "mlir/Support/LogicalResult.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "llvm/ADT/StringRef.h"
 
 #include <cstdlib>
@@ -136,13 +137,24 @@ class MlirOptMainConfig {
   }
   bool shouldShowDialects() const { return showDialectsFlag; }
 
-  /// Set whether to split the input file based on the `// -----` marker into
-  /// pieces and process each chunk independently.
-  MlirOptMainConfig &splitInputFile(bool split = true) {
-    splitInputFileFlag = split;
+  /// Set the marker on which to split the input into chunks and process each
+  /// chunk independently. Input is not split if empty.
+  MlirOptMainConfig &
+  splitInputFile(std::string splitMarker = kDefaultSplitMarker) {
+    splitInputFileFlag = std::move(splitMarker);
+    return *this;
+  }
+  bool shouldSplitInputFile() const { return splitInputFileFlag.empty(); }
+  StringRef inputSplitMarker() const { return splitInputFileFlag; }
+
+  /// Set whether to merge the output chunks into one file using the given
+  /// marker.
+  MlirOptMainConfig &
+  outputSplitMarker(std::string splitMarker = kDefaultSplitMarker) {
+    outputSplitMarkerFlag = std::move(splitMarker);
     return *this;
   }
-  bool shouldSplitInputFile() const { return splitInputFileFlag; }
+  StringRef outputSplitMarker() const { return outputSplitMarkerFlag; }
 
   /// Disable implicit addition of a top-level module op during parsing.
   MlirOptMainConfig &useExplicitModule(bool useExplicitModule) {
@@ -215,9 +227,12 @@ class MlirOptMainConfig {
   /// Show the registered dialects before trying to load the input file.
   bool showDialectsFlag = false;
 
-  /// Split the input file based on the `// -----` marker into pieces and
-  /// process each chunk independently.
-  bool splitInputFileFlag = false;
+  /// Split the input file based on the given marker into chunks and process
+  /// each chunk independently. Input is not split if empty.
+  std::string splitInputFileFlag = "";
+
+  /// Merge output chunks into one file using the given marker.
+  std::string outputSplitMarkerFlag = "";
 
   /// Use an explicit top-level module op during parsing.
   bool useExplicitModuleFlag = false;
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ec2ffe6ebb8de7..f05b4f5c643d50 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -21,11 +21,10 @@ using namespace mlir;
 LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                             ChunkBufferHandler processChunkBuffer,
-                            raw_ostream &os, bool enableSplitting,
-                            llvm::StringRef inputSplitMarker,
+                            raw_ostream &os, llvm::StringRef inputSplitMarker,
                             llvm::StringRef outputSplitMarker) {
   // If splitting is disabled, we process the full input buffer.
-  if (!enableSplitting)
+  if (inputSplitMarker.empty())
     return processChunkBuffer(std::move(originalBuffer), os);
 
   const int inputSplitMarkerLen = inputSplitMarker.size();
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index 20f45d11f2e403..da775f1b3051b3 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -127,11 +127,21 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
         cl::desc("Print the list of registered dialects and exit"),
         cl::location(showDialectsFlag), cl::init(false));
 
-    static cl::opt<bool, /*ExternalStorage=*/true> splitInputFile(
-        "split-input-file",
-        cl::desc("Split the input file into pieces and process each "
-                 "chunk independently"),
-        cl::location(splitInputFileFlag), cl::init(false));
+    static cl::opt<std::string, /*ExternalStorage=*/true> splitInputFile(
+        "split-input-file", llvm::cl::ValueOptional,
+        cl::callback([&](const std::string &str) {
+          // Implicit value: use default marker if flag was used without value.
+          if (str.empty())
+            splitInputFile.setValue(kDefaultSplitMarker);
+        }),
+        cl::desc("Split the input file into chunks using the given or "
+                 "default marker and process each chunk independently"),
+        cl::location(splitInputFileFlag), cl::init(""));
+
+    static cl::opt<std::string, /*ExternalStorage=*/true> outputSplitMarker(
+        "output-split-marker",
+        cl::desc("Split marker to use for merging the ouput"),
+        cl::location(outputSplitMarkerFlag), cl::init(kDefaultSplitMarker));
 
     static cl::opt<bool, /*ExternalStorage=*/true> verifyDiagnostics(
         "verify-diagnostics",
@@ -533,7 +543,8 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                          threadPool);
   };
   return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
-                               config.shouldSplitInputFile());
+                               config.inputSplitMarker(),
+                               config.outputSplitMarker());
 }
 
 LogicalResult mlir::MlirOptMain(int argc, char **argv,
diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
index c2499d8c2e2cac..1aaf8adb50a7a5 100644
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@@ -62,11 +62,16 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
       llvm::cl::desc("Allow operation with no registered dialects (discouraged: testing only!)"),
       llvm::cl::init(false));
 
-  static llvm::cl::opt<bool> splitInputFile(
-      "split-input-file",
-      llvm::cl::desc("Split the input file into pieces and "
-                     "process each chunk independently"),
-      llvm::cl::init(false));
+  static llvm::cl::opt<std::string> inputSplitMarker(
+      "split-input-file", llvm::cl::ValueOptional,
+      llvm::cl::callback([&](const std::string &str) {
+        // Implicit value: use default marker if flag was used without value.
+        if (str.empty())
+          inputSplitMarker.setValue(kDefaultSplitMarker);
+      }),
+      llvm::cl::desc("Split the input file into chunks using the given or "
+                     "default marker and process each chunk independently"),
+      llvm::cl::init(""));
 
   static llvm::cl::opt<bool> verifyDiagnostics(
       "verify-diagnostics",
@@ -80,13 +85,10 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
                      "(discouraged: testing only!)"),
       llvm::cl::init(false));
 
-  static llvm::cl::opt<std::string> inputSplitMarker(
-      "input-split-marker", llvm::cl::desc("Split marker to use for the input"),
-      llvm::cl::init(kDefaultSplitMarker));
-
   static llvm::cl::opt<std::string> outputSplitMarker(
       "output-split-marker",
-      llvm::cl::desc("Split marker to use for the ouput"), llvm::cl::init(""));
+      llvm::cl::desc("Split marker to use for merging the ouput"),
+      llvm::cl::init(""));
 
   llvm::InitLLVM y(argc, argv);
 
@@ -184,8 +186,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
   };
 
   if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                   output->os(), splitInputFile,
-                                   inputSplitMarker, outputSplitMarker)))
+                                   output->os(), inputSplitMarker,
+                                   outputSplitMarker)))
     return failure();
 
   output->keep();
diff --git a/mlir/test/mlir-opt/nearmiss.mlir b/mlir/test/mlir-opt/nearmiss.mlir
index 2f695517eca068..8011f5202b3f79 100644
--- a/mlir/test/mlir-opt/nearmiss.mlir
+++ b/mlir/test/mlir-opt/nearmiss.mlir
@@ -1,6 +1,13 @@
-// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t &&  FileCheck --input-file %t %s
+// Check near-miss mechanics:
+// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t \
+// RUN: &&  FileCheck --input-file %t %s
 // RUN: cat %t
 
+// Check that (1) custom input splitter and (2) custom output splitters work.
+// RUN: mlir-opt %s -split-input-file="// CHECK: ""----" \
+// RUN:   -output-split-marker="// ---- next split ----" \
+// RUN: | FileCheck -input-file %s -check-prefix=CHECK-SPLITTERS %s
+
 func.func @main() {return}
 
 // -----
@@ -20,3 +27,9 @@ func.func @bar2() {return }
 
 // No error flagged at the end for a near miss.
 // ----
+
+// CHECK-SPLITTERS: module
+// CHECK-SPLITTERS: ---- next split ----
+// CHECK-SPLITTERS: module
+// CHECK-SPLITTERS: ---- next split ----
+// CHECK-SPLITTERS: module
diff --git a/mlir/test/mlir-translate/split-markers.mlir b/mlir/test/mlir-translate/split-markers.mlir
index d9408ad0cea4d4..ed576bcd852360 100644
--- a/mlir/test/mlir-translate/split-markers.mlir
+++ b/mlir/test/mlir-translate/split-markers.mlir
@@ -1,17 +1,21 @@
-// Check that (1) the output split marker is inserted and (2) the default split
-// marker is used for the input.
+// Check that (1) the output split marker is inserted and (2) the input file is
+// split using the default split marker.
 // RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
 // RUN:   -output-split-marker="; -----" \
 // RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
 
 // With the second command, check that (3) the input split marker is used and
 // (4) the output split marker is empty if not specified.
-// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN: mlir-translate %s -split-input-file="// ""-----" -mlir-to-llvmir \
 // RUN:   -output-split-marker="; -----" \
 // RUN: | mlir-translate -split-input-file -import-llvm \
-// RUN:   -input-split-marker="; -----" \
+// RUN:   -split-input-file="; -----" \
 // RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
 
+// Check that (5) the input is not split if `-split-input-file` is not given.
+// RUN: mlir-translate %s -mlir-to-llvmir \
+// RUN: | FileCheck -check-prefix=CHECK-NOSPLIT %s
+
 // CHECK-OUTPUT:      ModuleID
 // CHECK-OUTPUT:      ; -----
 // CHECK-OUTPUT-NEXT: ModuleID
@@ -21,6 +25,9 @@
 // CHECK-ROUNDTRIP-EMPTY:
 // CHECK-ROUNDTRIP:       module
 
+// CHECK-NOSPLIT:     ModuleID
+// CHECK-NOSPLIT-NOT: ModuleID
+
 module {}
 
 // -----
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index 43e4aa50642403..6d46f38581e965 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -136,11 +136,12 @@ int main(int argc, char **argv) {
       llvm::cl::desc(
           "Print out the parsed ODS information from the input file"),
       llvm::cl::init(false));
-  llvm::cl::opt<bool> splitInputFile(
+  llvm::cl::opt<std::string> splitInputFile(
       "split-input-file",
-      llvm::cl::desc("Split the input file into pieces and process each "
-                     "chunk independently"),
-      llvm::cl::init(false));
+      llvm::cl::desc(
+          "Split the input file into chunks using the given marker and "
+          "process each chunk independently"),
+      llvm::cl::init(kDefaultSplitMarker));
   llvm::cl::opt<enum OutputType> outputType(
       "x", llvm::cl::init(OutputType::AST),
       llvm::cl::desc("The type of output desired"),

>From 646449e90dd41f2fbc9b06600b4b059045fe3dbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Wed, 13 Mar 2024 11:03:59 +0000
Subject: [PATCH 4/7] Fix CI: Add full CL parsing logic to mlir-pdll.

I have forgotten to add the extended logic from the previous commit to
that tool. This commit fixes that and adds a corresponding test.
---
 mlir/test/mlir-pdll/split-markers.pdll | 36 ++++++++++++++++++++++++++
 mlir/tools/mlir-pdll/mlir-pdll.cpp     | 20 +++++++++-----
 2 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 mlir/test/mlir-pdll/split-markers.pdll

diff --git a/mlir/test/mlir-pdll/split-markers.pdll b/mlir/test/mlir-pdll/split-markers.pdll
new file mode 100644
index 00000000000000..45e409a8383695
--- /dev/null
+++ b/mlir/test/mlir-pdll/split-markers.pdll
@@ -0,0 +1,36 @@
+// Check that (1) the default input split marker used if no custom marker is
+// specified and (2) the output file is merged using the default marker.
+// RUN: mlir-pdll %s -split-input-file \
+// RUN: | FileCheck -check-prefix=CHECK-DEFAULT %s
+
+// Check that the custom (3) input and (output) split markers are used if
+// provided.
+// RUN: mlir-pdll %s \
+// RUN:   -split-input-file="// ""=====" -output-split-marker "// #####" \
+// RUN: | FileCheck -check-prefix=CHECK-CUSTOM %s
+
+// CHECK-DEFAULT:      Module
+// CHECK-DEFAULT-NEXT: PatternDecl
+// CHECK-DEFAULT-NOT:  PatternDecl
+// CHECK-DEFAULT:      //{{ }}-----
+// CHECK-DEFAULT-NEXT: Module
+// CHECK-DEFAULT-NEXT: PatternDecl
+// CHECK-DEFAULT:      PatternDecl
+
+// CHECK-CUSTOM:      Module
+// CHECK-CUSTOM-NEXT: PatternDecl
+// CHECK-CUSTOM:      PatternDecl
+// CHECK-CUSTOM:      // #####
+// CHECK-CUSTOM-NEXT: Module
+// CHECK-CUSTOM-NEXT: PatternDecl
+// CHECK-CUSTOM-NOT:  PatternDecl
+
+Pattern => erase op<test.op>;
+
+// -----
+
+Pattern => erase op<test.op2>;
+
+// =====
+
+Pattern => erase op<test.op3>;
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index 6d46f38581e965..d312765e40b029 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -136,11 +136,19 @@ int main(int argc, char **argv) {
       llvm::cl::desc(
           "Print out the parsed ODS information from the input file"),
       llvm::cl::init(false));
-  llvm::cl::opt<std::string> splitInputFile(
-      "split-input-file",
-      llvm::cl::desc(
-          "Split the input file into chunks using the given marker and "
-          "process each chunk independently"),
+  llvm::cl::opt<std::string> inputSplitMarker(
+      "split-input-file", llvm::cl::ValueOptional,
+      llvm::cl::callback([&](const std::string &str) {
+        // Implicit value: use default marker if flag was used without value.
+        if (str.empty())
+          inputSplitMarker.setValue(kDefaultSplitMarker);
+      }),
+      llvm::cl::desc("Split the input file into chunks using the given or "
+                     "default marker and process each chunk independently"),
+      llvm::cl::init(""));
+  llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for merging the ouput"),
       llvm::cl::init(kDefaultSplitMarker));
   llvm::cl::opt<enum OutputType> outputType(
       "x", llvm::cl::init(OutputType::AST),
@@ -188,7 +196,7 @@ int main(int argc, char **argv) {
                          dumpODS, includedFiles);
   };
   if (failed(splitAndProcessBuffer(std::move(inputFile), processFn, outputStrOS,
-                                   splitInputFile)))
+                                   inputSplitMarker, outputSplitMarker)))
     return 1;
 
   // Write the output.

>From 94bf1355708a2dabb4a1747d15d34e73d2befaa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Wed, 13 Mar 2024 12:02:55 +0000
Subject: [PATCH 5/7] Make default for outputSplitMarker match previous default
 behavior.

---
 mlir/include/mlir/Support/ToolUtilities.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index 73b7571fb56b7c..3abbc3bc4b0513 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -44,7 +44,7 @@ LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,
                       llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
-                      llvm::StringRef outputSplitMarker = kDefaultSplitMarker);
+                      llvm::StringRef outputSplitMarker = "");
 } // namespace mlir
 
 #endif // MLIR_SUPPORT_TOOLUTILITIES_H

>From 9a079fa7f15aff0bf43f424d6d92076978b1038d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Wed, 13 Mar 2024 12:12:57 +0000
Subject: [PATCH 6/7] Document insertion of the new line character inserted by
 splitAndProcessBuffer.

---
 mlir/include/mlir/Support/ToolUtilities.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index 3abbc3bc4b0513..511cb118bb2467 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -36,10 +36,12 @@ extern inline const char *const kDefaultSplitMarker = "// -----";
 /// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
-/// into a single file. The input split marker is configurable. If it is empty
+/// into a single file. The input split marker is configurable. If it is empty,
 /// merging is disabled, which allows for merging split and non-split code
-/// paths. Output split markers (`//-----` by default) are placed between each
-/// of the processed output chunks.
+/// paths. Output split markers (`//-----` by default) followed by a new line
+/// character, respectively, are placed between each of the processed output
+/// chunks. (The new line character is inserted even if the split marker is
+/// empty.)
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,

>From b0074ba2463505ecc4288c9cade93226ed8d6eca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ingo=20M=C3=BCller?= <ingomueller at google.com>
Date: Thu, 14 Mar 2024 08:54:29 +0000
Subject: [PATCH 7/7] Rename test/mlir-opt/{nearmiss,split-markers}.mlir.

---
 mlir/test/mlir-opt/{nearmiss.mlir => split-markers.mlir} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename mlir/test/mlir-opt/{nearmiss.mlir => split-markers.mlir} (100%)

diff --git a/mlir/test/mlir-opt/nearmiss.mlir b/mlir/test/mlir-opt/split-markers.mlir
similarity index 100%
rename from mlir/test/mlir-opt/nearmiss.mlir
rename to mlir/test/mlir-opt/split-markers.mlir



More information about the Mlir-commits mailing list