[Mlir-commits] [mlir] [mlir] Make the split markers of splitAndProcessBuffer configurable. (PR #84765)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Mon Mar 11 07:39:30 PDT 2024


Ingo =?utf-8?q?Müller?= <ingomueller at google.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/84765 at github.com>


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-core

Author: Ingo Müller (ingomueller-net)

<details>
<summary>Changes</summary>

This allows to define custom splitters, which is interesting for non-MLIR inputs and outputs to `mlir-translate`. For example, one way use `; -----` as a splitter of `.ll` files. The splitters now passes as arguments into `splitAndProcessBuffer`, both of which default to MLIR's default of `// -----`. The behavior of the input split marker should not change at all; however, outputs now have one new line *more* than before if there is no splitter (old: `insertMarkerInOutput = false`, new: `outputSplitMarker = ""`) and one new line *less* if there is one. These parameters are exposed as command line options of `mlir-translate` as `input-split-marker` and `output-split-marker`, which default to `"// -----"` and `""` (without quotes), respectively, which exactly corresponds to the previous behavior (module the new lines mentioned before).

---
Full diff: https://github.com/llvm/llvm-project/pull/84765.diff


8 Files Affected:

- (modified) mlir/include/mlir/Support/ToolUtilities.h (+11-6) 
- (modified) mlir/lib/Support/ToolUtilities.cpp (+10-10) 
- (modified) mlir/lib/Tools/lsp-server-support/Transport.cpp (+2-1) 
- (modified) mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp (+2-4) 
- (modified) mlir/lib/Tools/mlir-opt/MlirOptMain.cpp (+1-2) 
- (modified) mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp (+4-5) 
- (modified) mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp (+10-1) 
- (added) mlir/test/mlir-translate/split-markers.mlir (+28) 


``````````diff
diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index d2c89409c0653b..c85c295824c714 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -15,6 +15,8 @@
 
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
 #include <memory>
 
 namespace llvm {
@@ -27,20 +29,23 @@ struct LogicalResult;
 using ChunkBufferHandler = function_ref<LogicalResult(
     std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
 
-/// Splits the specified buffer on a marker (`// -----`), processes each chunk
-/// independently according to the normal `processChunkBuffer` logic, and writes
-/// all results to `os`.
+extern inline const char *const kDefaultSplitMarker = "// -----";
+
+/// Splits the specified buffer on a marker (`// -----` by default), processes
+/// each chunk independently according to the normal `processChunkBuffer` logic,
+/// and writes all results to `os`.
 ///
 /// This is used to allow a large number of small independent tests to be put
 /// into a single file. `enableSplitting` can be used to toggle if splitting
 /// should be enabled, e.g. to allow for merging split and non-split code paths.
-/// When `insertMarkerInOutput` is true, split markers (`//-----`) are placed
-/// between each of the processed output chunks.
+/// Output split markers (`//-----` by default) are placed between each of the
+/// processed output chunks.
 LogicalResult
 splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                       ChunkBufferHandler processChunkBuffer, raw_ostream &os,
                       bool enableSplitting = true,
-                      bool insertMarkerInOutput = false);
+                      llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
+                      llvm::StringRef outputSplitMarker = kDefaultSplitMarker);
 } // namespace mlir
 
 #endif // MLIR_SUPPORT_TOOLUTILITIES_H
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ee0214f3d8ac04..ec2ffe6ebb8de7 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -22,21 +22,20 @@ LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                             ChunkBufferHandler processChunkBuffer,
                             raw_ostream &os, bool enableSplitting,
-                            bool insertMarkerInOutput) {
+                            llvm::StringRef inputSplitMarker,
+                            llvm::StringRef outputSplitMarker) {
   // If splitting is disabled, we process the full input buffer.
   if (!enableSplitting)
     return processChunkBuffer(std::move(originalBuffer), os);
 
-  const char splitMarkerConst[] = "// -----";
-  StringRef splitMarker(splitMarkerConst);
-  const int splitMarkerLen = splitMarker.size();
+  const int inputSplitMarkerLen = inputSplitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
   SmallVector<StringRef, 8> rawSourceBuffers;
   const int checkLen = 2;
   // Split dropping the last checkLen chars to enable flagging near misses.
   origMemBuffer->getBuffer().split(rawSourceBuffers,
-                                   splitMarker.drop_back(checkLen));
+                                   inputSplitMarker.drop_back(checkLen));
   if (rawSourceBuffers.empty())
     return success();
 
@@ -58,8 +57,9 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
     }
 
     // Check that suffix is as expected and doesn't have any dash post.
-    bool expectedSuffix = buffer.starts_with(splitMarker.take_back(checkLen)) &&
-                          buffer.size() > checkLen && buffer[checkLen] != '0';
+    bool expectedSuffix =
+        buffer.starts_with(inputSplitMarker.take_back(checkLen)) &&
+        buffer.size() > checkLen && buffer[checkLen] != '0';
     if (expectedSuffix) {
       sourceBuffers.push_back(prev);
       prev = buffer.drop_front(checkLen);
@@ -69,8 +69,8 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
                                  llvm::SourceMgr::DK_Warning,
                                  "near miss with file split marker");
-      prev = StringRef(prev.data(),
-                       prev.size() + splitMarkerLen - checkLen + buffer.size());
+      prev = StringRef(prev.data(), prev.size() + inputSplitMarkerLen -
+                                        checkLen + buffer.size());
     }
   }
   if (!prev.empty())
@@ -89,7 +89,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       hadFailure = true;
   };
   llvm::interleave(sourceBuffers, os, interleaveFn,
-                   insertMarkerInOutput ? "\n// -----\n" : "");
+                   (llvm::Twine(outputSplitMarker) + "\n").str());
 
   // If any fails, then return a failure of the tool.
   return failure(hadFailure);
diff --git a/mlir/lib/Tools/lsp-server-support/Transport.cpp b/mlir/lib/Tools/lsp-server-support/Transport.cpp
index df675cf78210be..64dea35614c070 100644
--- a/mlir/lib/Tools/lsp-server-support/Transport.cpp
+++ b/mlir/lib/Tools/lsp-server-support/Transport.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Tools/lsp-server-support/Transport.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/Protocol.h"
 #include "llvm/ADT/SmallString.h"
@@ -347,7 +348,7 @@ LogicalResult JSONTransport::readDelimitedMessage(std::string &json) {
     StringRef lineRef = line.str().trim();
     if (lineRef.starts_with("//")) {
       // Found a delimiter for the message.
-      if (lineRef == "// -----")
+      if (lineRef == kDefaultSplitMarker)
         break;
       continue;
     }
diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
index de657a3df9ef7b..ed75b4a90536eb 100644
--- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
+++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
@@ -15,6 +15,7 @@
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Parser/Parser.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/lsp-server-support/Logging.h"
 #include "mlir/Tools/lsp-server-support/SourceMgrUtils.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1052,11 +1053,8 @@ MLIRTextFile::MLIRTextFile(const lsp::URIForFile &uri, StringRef fileContents,
   context.allowUnregisteredDialects();
 
   // Split the file into separate MLIR documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<MLIRTextFileChunk>(
       context, /*lineOffset=*/0, uri, subContents.front(), diagnostics));
 
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index b62557153b4167..20f45d11f2e403 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -533,8 +533,7 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
                          threadPool);
   };
   return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
-                               config.shouldSplitInputFile(),
-                               /*insertMarkerInOutput=*/true);
+                               config.shouldSplitInputFile());
 }
 
 LogicalResult mlir::MlirOptMain(int argc, char **argv,
diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
index a5c6c2bb2c6a0c..d282ee8f61d8fe 100644
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@@ -10,6 +10,7 @@
 
 #include "Protocol.h"
 #include "mlir/IR/BuiltinOps.h"
+#include "mlir/Support/ToolUtilities.h"
 #include "mlir/Tools/PDLL/AST/Context.h"
 #include "mlir/Tools/PDLL/AST/Nodes.h"
 #include "mlir/Tools/PDLL/AST/Types.h"
@@ -1621,7 +1622,8 @@ PDLTextFile::getPDLLViewOutput(lsp::PDLLViewOutputKind kind) {
         [&](PDLTextFileChunk &chunk) {
           chunk.document.getPDLLViewOutput(outputOS, kind);
         },
-        [&] { outputOS << "\n// -----\n\n"; });
+        [&] { outputOS << "\n"
+                       << kDefaultSplitMarker << "\n\n"; });
   }
   return result;
 }
@@ -1632,11 +1634,8 @@ void PDLTextFile::initialize(const lsp::URIForFile &uri, int64_t newVersion,
   chunks.clear();
 
   // Split the file into separate PDL documents.
-  // TODO: Find a way to share the split file marker with other tools. We don't
-  // want to use `splitAndProcessBuffer` here, but we do want to make sure this
-  // marker doesn't go out of sync.
   SmallVector<StringRef, 8> subContents;
-  StringRef(contents).split(subContents, "// -----");
+  StringRef(contents).split(subContents, kDefaultSplitMarker);
   chunks.emplace_back(std::make_unique<PDLTextFileChunk>(
       /*lineOffset=*/0, uri, subContents.front(), extraIncludeDirs,
       diagnostics));
diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
index 92adb8d6ac97c6..c2499d8c2e2cac 100644
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@@ -80,6 +80,14 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
                      "(discouraged: testing only!)"),
       llvm::cl::init(false));
 
+  static llvm::cl::opt<std::string> inputSplitMarker(
+      "input-split-marker", llvm::cl::desc("Split marker to use for the input"),
+      llvm::cl::init(kDefaultSplitMarker));
+
+  static llvm::cl::opt<std::string> outputSplitMarker(
+      "output-split-marker",
+      llvm::cl::desc("Split marker to use for the ouput"), llvm::cl::init(""));
+
   llvm::InitLLVM y(argc, argv);
 
   // Add flags for all the registered translations.
@@ -176,7 +184,8 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
   };
 
   if (failed(splitAndProcessBuffer(std::move(input), processBuffer,
-                                   output->os(), splitInputFile)))
+                                   output->os(), splitInputFile,
+                                   inputSplitMarker, outputSplitMarker)))
     return failure();
 
   output->keep();
diff --git a/mlir/test/mlir-translate/split-markers.mlir b/mlir/test/mlir-translate/split-markers.mlir
new file mode 100644
index 00000000000000..d9408ad0cea4d4
--- /dev/null
+++ b/mlir/test/mlir-translate/split-markers.mlir
@@ -0,0 +1,28 @@
+// Check that (1) the output split marker is inserted and (2) the default split
+// marker is used for the input.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-OUTPUT %s
+
+// With the second command, check that (3) the input split marker is used and
+// (4) the output split marker is empty if not specified.
+// RUN: mlir-translate %s -split-input-file -mlir-to-llvmir \
+// RUN:   -output-split-marker="; -----" \
+// RUN: | mlir-translate -split-input-file -import-llvm \
+// RUN:   -input-split-marker="; -----" \
+// RUN: | FileCheck -check-prefix=CHECK-ROUNDTRIP %s
+
+// CHECK-OUTPUT:      ModuleID
+// CHECK-OUTPUT:      ; -----
+// CHECK-OUTPUT-NEXT: ModuleID
+
+// CHECK-ROUNDTRIP:       module {{.*}} {
+// CHECK-ROUNDTRIP-NEXT:  }
+// CHECK-ROUNDTRIP-EMPTY:
+// CHECK-ROUNDTRIP:       module
+
+module {}
+
+// -----
+
+module {}

``````````

</details>


https://github.com/llvm/llvm-project/pull/84765


More information about the Mlir-commits mailing list