[Mlir-commits] [mlir] [mlir] Report line number from file rather than split (PR #150982)
Jacques Pienaar
llvmlistbot at llvm.org
Mon Jul 28 20:04:33 PDT 2025
https://github.com/jpienaar updated https://github.com/llvm/llvm-project/pull/150982
>From 49ba6a55be3fe5558f55c9843a1775a02b90d4b6 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jacques+gh at japienaar.info>
Date: Mon, 28 Jul 2025 17:53:13 +0000
Subject: [PATCH] [mlir] Report line number from file rather than split
Add convention for lexer if the last file is contained in the first,
then the first is used for error reporting. This requires that these two
overlap to make it easy to find the corresponding spots. Enables going
from
```
within split at mlir/test/IR/invalid.mlir::10 offset :6:9: error: reference to an undefined block
```
to
```
mlir/test/IR/invalid.mlir:15:9: error: reference to an undefined block
```
This does change the split to not produce always null terminated buffers and
tools that need it to do so themselves (which is mostly by copying - this may
have no actual impact as previously this was a copy too). Alternatively I could
copy both and then split buffers are always null-terminated, but that seems
worse.
---
mlir/include/mlir/IR/Diagnostics.h | 4 ++
mlir/include/mlir/Support/ToolUtilities.h | 15 +++++
mlir/lib/IR/Diagnostics.cpp | 21 ++++---
mlir/lib/Support/ToolUtilities.cpp | 39 ++++++++++---
mlir/lib/Tools/mlir-opt/MlirOptMain.cpp | 55 +++++++++++++------
.../mlir-translate/MlirTranslateMain.cpp | 7 +++
mlir/test/IR/diagnostic-nosplit.mlir | 13 +++++
mlir/test/IR/top-level.mlir | 4 +-
mlir/tools/mlir-pdll/mlir-pdll.cpp | 6 ++
9 files changed, 127 insertions(+), 37 deletions(-)
create mode 100644 mlir/test/IR/diagnostic-nosplit.mlir
diff --git a/mlir/include/mlir/IR/Diagnostics.h b/mlir/include/mlir/IR/Diagnostics.h
index 4ed0423902138..7ff718ad7f241 100644
--- a/mlir/include/mlir/IR/Diagnostics.h
+++ b/mlir/include/mlir/IR/Diagnostics.h
@@ -639,6 +639,10 @@ class SourceMgrDiagnosticVerifierHandler : public SourceMgrDiagnosticHandler {
/// verified correctly, failure otherwise.
LogicalResult verify();
+ /// Register this handler with the given context. This is intended for use
+ /// with the splitAndProcessBuffer function.
+ void registerInContext(MLIRContext *ctx);
+
private:
/// Process a single diagnostic.
void process(Diagnostic &diag);
diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h
index cb6ba299d28f1..657f117d8d8b5 100644
--- a/mlir/include/mlir/Support/ToolUtilities.h
+++ b/mlir/include/mlir/Support/ToolUtilities.h
@@ -21,10 +21,16 @@
namespace llvm {
class MemoryBuffer;
+class MemoryBufferRef;
} // namespace llvm
namespace mlir {
+// A function that processes a chunk of a buffer and writes the result to an
+// output stream.
using ChunkBufferHandler = function_ref<LogicalResult(
+ std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
+ const llvm::MemoryBufferRef &sourceBuffer, raw_ostream &os)>;
+using NoSourceChunkBufferHandler = function_ref<LogicalResult(
std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
extern inline const char *const kDefaultSplitMarker = "// -----";
@@ -45,6 +51,15 @@ splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
ChunkBufferHandler processChunkBuffer, raw_ostream &os,
llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
llvm::StringRef outputSplitMarker = "");
+
+/// Same as above, but for case where the original buffer is not used while
+/// processing the chunk.
+LogicalResult
+splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
+ NoSourceChunkBufferHandler processChunkBuffer,
+ raw_ostream &os,
+ llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
+ llvm::StringRef outputSplitMarker = "");
} // namespace mlir
#endif // MLIR_SUPPORT_TOOLUTILITIES_H
diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index 3e337951bcd3f..776b5c6588c71 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -821,15 +821,7 @@ SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
for (unsigned i = 0, e = mgr.getNumBuffers(); i != e; ++i)
(void)impl->computeExpectedDiags(out, mgr, mgr.getMemoryBuffer(i + 1));
- // Register a handler to verify the diagnostics.
- setHandler([&](Diagnostic &diag) {
- // Process the main diagnostics.
- process(diag);
-
- // Process each of the notes.
- for (auto ¬e : diag.getNotes())
- process(note);
- });
+ registerInContext(ctx);
}
SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
@@ -862,6 +854,17 @@ LogicalResult SourceMgrDiagnosticVerifierHandler::verify() {
return impl->status;
}
+void SourceMgrDiagnosticVerifierHandler::registerInContext(MLIRContext *ctx) {
+ ctx->getDiagEngine().registerHandler([&](Diagnostic &diag) {
+ // Process the main diagnostics.
+ process(diag);
+
+ // Process each of the notes.
+ for (auto ¬e : diag.getNotes())
+ process(note);
+ });
+}
+
/// Process a single diagnostic.
void SourceMgrDiagnosticVerifierHandler::process(Diagnostic &diag) {
return process(diag.getLocation(), diag.str(), diag.getSeverity());
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index 748f92847ac58..2cf33eb30d06a 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -14,6 +14,8 @@
#include "mlir/Support/LLVM.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <utility>
using namespace mlir;
@@ -22,18 +24,18 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
ChunkBufferHandler processChunkBuffer,
raw_ostream &os, llvm::StringRef inputSplitMarker,
llvm::StringRef outputSplitMarker) {
+ llvm::MemoryBufferRef originalBufferRef = originalBuffer->getMemBufferRef();
// If splitting is disabled, we process the full input buffer.
if (inputSplitMarker.empty())
- return processChunkBuffer(std::move(originalBuffer), os);
+ return processChunkBuffer(std::move(originalBuffer), originalBufferRef, os);
const int inputSplitMarkerLen = inputSplitMarker.size();
- auto *origMemBuffer = originalBuffer.get();
SmallVector<StringRef, 8> rawSourceBuffers;
const int checkLen = 2;
// Split dropping the last checkLen chars to enable flagging near misses.
- origMemBuffer->getBuffer().split(rawSourceBuffers,
- inputSplitMarker.drop_back(checkLen));
+ originalBufferRef.getBuffer().split(rawSourceBuffers,
+ inputSplitMarker.drop_back(checkLen));
if (rawSourceBuffers.empty())
return success();
@@ -79,11 +81,17 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
auto interleaveFn = [&](StringRef subBuffer) {
auto splitLoc = SMLoc::getFromPointer(subBuffer.data());
unsigned splitLine = fileSourceMgr.getLineAndColumn(splitLoc).first;
- auto subMemBuffer = llvm::MemoryBuffer::getMemBufferCopy(
- subBuffer, Twine("within split at ") +
- origMemBuffer->getBufferIdentifier() + ":" +
- Twine(splitLine) + " offset ");
- if (failed(processChunkBuffer(std::move(subMemBuffer), os)))
+ std::string name((Twine("within split at ") +
+ originalBufferRef.getBufferIdentifier() + ":" +
+ Twine(splitLine) + " offset ")
+ .str());
+ // Use MemoryBufferRef to avoid copying the buffer & keep at same location
+ // relative to the original buffer.
+ auto subMemBuffer =
+ llvm::MemoryBuffer::getMemBuffer(llvm::MemoryBufferRef(subBuffer, name),
+ /*RequiresNullTerminator=*/false);
+ if (failed(
+ processChunkBuffer(std::move(subMemBuffer), originalBufferRef, os)))
hadFailure = true;
};
llvm::interleave(sourceBuffers, os, interleaveFn,
@@ -92,3 +100,16 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
// If any fails, then return a failure of the tool.
return failure(hadFailure);
}
+
+LogicalResult
+mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
+ NoSourceChunkBufferHandler processChunkBuffer,
+ raw_ostream &os, llvm::StringRef inputSplitMarker,
+ llvm::StringRef outputSplitMarker) {
+ auto process = [&](std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
+ const llvm::MemoryBufferRef &, raw_ostream &os) {
+ return processChunkBuffer(std::move(chunkBuffer), os);
+ };
+ return splitAndProcessBuffer(std::move(originalBuffer), process, os,
+ inputSplitMarker, outputSplitMarker);
+}
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index 8f785900780f5..bdcdaa407e616 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -508,13 +508,20 @@ performActions(raw_ostream &os,
/// Parses the memory buffer. If successfully, run a series of passes against
/// it and print the result.
-static LogicalResult processBuffer(raw_ostream &os,
- std::unique_ptr<MemoryBuffer> ownedBuffer,
- const MlirOptMainConfig &config,
- DialectRegistry ®istry,
- llvm::ThreadPoolInterface *threadPool) {
+static LogicalResult
+processBuffer(raw_ostream &os, std::unique_ptr<MemoryBuffer> ownedBuffer,
+ llvm::MemoryBufferRef sourceBuffer,
+ const MlirOptMainConfig &config, DialectRegistry ®istry,
+ SourceMgrDiagnosticVerifierHandler *verifyHandler,
+ llvm::ThreadPoolInterface *threadPool) {
// Tell sourceMgr about this buffer, which is what the parser will pick up.
auto sourceMgr = std::make_shared<SourceMgr>();
+ // Add the original buffer to the source manager to use for determining
+ // locations.
+ sourceMgr->AddNewSourceBuffer(
+ llvm::MemoryBuffer::getMemBuffer(sourceBuffer,
+ /*RequiresNullTerminator=*/false),
+ SMLoc());
sourceMgr->AddNewSourceBuffer(std::move(ownedBuffer), SMLoc());
// Create a context just for the current buffer. Disable threading on creation
@@ -522,6 +529,8 @@ static LogicalResult processBuffer(raw_ostream &os,
MLIRContext context(registry, MLIRContext::Threading::DISABLED);
if (threadPool)
context.setThreadPool(*threadPool);
+ if (verifyHandler)
+ verifyHandler->registerInContext(&context);
StringRef irdlFile = config.getIrdlFile();
if (!irdlFile.empty() && failed(loadIRDLDialects(irdlFile, context)))
@@ -545,17 +554,12 @@ static LogicalResult processBuffer(raw_ostream &os,
return performActions(os, sourceMgr, &context, config);
}
- SourceMgrDiagnosticVerifierHandler sourceMgrHandler(
- *sourceMgr, &context, config.verifyDiagnosticsLevel());
-
// Do any processing requested by command line flags. We don't care whether
// these actions succeed or fail, we only care what diagnostics they produce
// and whether they match our expectations.
(void)performActions(os, sourceMgr, &context, config);
- // Verify the diagnostic handler to make sure that each of the diagnostics
- // matched.
- return sourceMgrHandler.verify();
+ return success();
}
std::pair<std::string, std::string>
@@ -624,14 +628,31 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
if (threadPoolCtx.isMultithreadingEnabled())
threadPool = &threadPoolCtx.getThreadPool();
+ SourceMgr sourceMgr;
+ sourceMgr.AddNewSourceBuffer(
+ llvm::MemoryBuffer::getMemBuffer(buffer->getMemBufferRef(),
+ /*RequiresNullTerminator=*/false),
+ SMLoc());
+ // Note: this creates a verifier handler independent of the the flag set, as
+ // internally if the flag is not set, a new scoped diagnostic handler is
+ // created which would intercept the diagnostics and verify them.
+ SourceMgrDiagnosticVerifierHandler sourceMgrHandler(
+ sourceMgr, &threadPoolCtx, config.verifyDiagnosticsLevel());
auto chunkFn = [&](std::unique_ptr<MemoryBuffer> chunkBuffer,
- raw_ostream &os) {
- return processBuffer(os, std::move(chunkBuffer), config, registry,
- threadPool);
+ llvm::MemoryBufferRef sourceBuffer, raw_ostream &os) {
+ return processBuffer(
+ os, std::move(chunkBuffer), sourceBuffer, config, registry,
+ config.shouldVerifyDiagnostics() ? &sourceMgrHandler : nullptr,
+ threadPool);
};
- return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
- config.inputSplitMarker(),
- config.outputSplitMarker());
+ LogicalResult status = splitAndProcessBuffer(
+ llvm::MemoryBuffer::getMemBuffer(buffer->getMemBufferRef(),
+ /*RequiresNullTerminator=*/false),
+ chunkFn, outputStream, config.inputSplitMarker(),
+ config.outputSplitMarker());
+ if (config.shouldVerifyDiagnostics() && failed(sourceMgrHandler.verify()))
+ status = failure();
+ return status;
}
LogicalResult mlir::MlirOptMain(int argc, char **argv,
diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
index c11cb8d2104fb..e1c8afb7c3f2f 100644
--- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
+++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp
@@ -135,6 +135,13 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
// Processes the memory buffer with a new MLIRContext.
auto processBuffer = [&](std::unique_ptr<llvm::MemoryBuffer> ownedBuffer,
raw_ostream &os) {
+ // Many of the translations expect a null-terminated buffer while splitting
+ // the buffer does not guarantee null-termination. Make a copy of the buffer
+ // to ensure null-termination.
+ if (!ownedBuffer->getBuffer().ends_with('\0')) {
+ ownedBuffer = llvm::MemoryBuffer::getMemBufferCopy(
+ ownedBuffer->getBuffer(), ownedBuffer->getBufferIdentifier());
+ }
// Temporary buffers for chained translation processing.
std::string dataIn;
std::string dataOut;
diff --git a/mlir/test/IR/diagnostic-nosplit.mlir b/mlir/test/IR/diagnostic-nosplit.mlir
new file mode 100644
index 0000000000000..ecfb9c62469b5
--- /dev/null
+++ b/mlir/test/IR/diagnostic-nosplit.mlir
@@ -0,0 +1,13 @@
+// RUN: not mlir-opt %s -o - --split-input-file 2>&1 | FileCheck %s
+// This test verifies that diagnostic handler doesn't emit splits.
+
+
+// -----
+
+
+
+func.func @constant_out_of_range() {
+ // CHECK: mlir:11:8: error: 'arith.constant'
+ %x = "arith.constant"() {value = 100} : () -> i1
+ return
+}
diff --git a/mlir/test/IR/top-level.mlir b/mlir/test/IR/top-level.mlir
index b571d944928c8..e0adb4d823344 100644
--- a/mlir/test/IR/top-level.mlir
+++ b/mlir/test/IR/top-level.mlir
@@ -6,10 +6,10 @@ func.func private @foo()
// -----
-// expected-error at -3 {{source must contain a single top-level operation, found: 2}}
+// expected-error at -9 {{source must contain a single top-level operation, found: 2}}
func.func private @bar()
func.func private @baz()
// -----
-// expected-error at -3 {{source must contain a single top-level operation, found: 0}}
+// expected-error at -15 {{source must contain a single top-level operation, found: 0}}
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index 88a5f3639c962..f99dcdb53fe97 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -201,6 +201,12 @@ int main(int argc, char **argv) {
llvm::raw_string_ostream outputStrOS(outputStr);
auto processFn = [&](std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
raw_ostream &os) {
+ // Split does not guarantee null-termination. Make a copy of the buffer to
+ // ensure null-termination.
+ if (!chunkBuffer->getBuffer().ends_with('\0')) {
+ chunkBuffer = llvm::MemoryBuffer::getMemBufferCopy(
+ chunkBuffer->getBuffer(), chunkBuffer->getBufferIdentifier());
+ }
return processBuffer(os, std::move(chunkBuffer), outputType, includeDirs,
dumpODS, includedFiles);
};
More information about the Mlir-commits
mailing list