[Mlir-commits] [mlir] [mlir][python] auto-locs (PR #151246)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Jul 30 07:29:26 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
@llvm/pr-subscribers-mlir-core
Author: Maksim Levental (makslevental)
<details>
<summary>Changes</summary>
This PR implements "automatic" location inference in the bindings. The way it works is it walks the frame stack collecting source locations (Python captures these in the frame itself). It is adapted from JAX's [implementation](https://github.com/jax-ml/jax/blob/523ddcfbcad005deab5a7d542df4c706f5ee5e9c/jax/_src/interpreters/mlir.py#L462) but moves the frame stack traversal into the bindings for better performance.
The system supports registering "included" and "excluded" filenames; stackframes originating from functions in included filenames **will not** be filtered and stackframes originating from functions in excluded filenames **will** be filtered (in that order). This allows excluding all the generated `*_ops_gen.py` files.
The system is also "toggleable" and off by default to save people who have their own systems (such as JAX) from the added cost.
TODO: more tests
---
Patch is 20.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151246.diff
8 Files Affected:
- (modified) mlir/lib/Bindings/Python/Globals.h (+37)
- (modified) mlir/lib/Bindings/Python/IRCore.cpp (+121-15)
- (modified) mlir/lib/Bindings/Python/IRModule.cpp (+57-1)
- (modified) mlir/lib/Bindings/Python/IRModule.h (+2-3)
- (modified) mlir/lib/Bindings/Python/MainModule.cpp (+21-2)
- (added) mlir/test/python/ir/auto_location.py (+52)
- (added) mlir/test/python/ir/lit.local.cfg (+2)
- (modified) mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp (+1)
``````````diff
diff --git a/mlir/lib/Bindings/Python/Globals.h b/mlir/lib/Bindings/Python/Globals.h
index 826a34a535176..2071ba92f5236 100644
--- a/mlir/lib/Bindings/Python/Globals.h
+++ b/mlir/lib/Bindings/Python/Globals.h
@@ -10,15 +10,19 @@
#define MLIR_BINDINGS_PYTHON_GLOBALS_H
#include <optional>
+#include <regex>
#include <string>
+#include <unordered_set>
#include <vector>
#include "NanobindUtils.h"
#include "mlir-c/IR.h"
#include "mlir/CAPI/Support.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Regex.h"
namespace mlir {
namespace python {
@@ -114,6 +118,37 @@ class PyGlobals {
std::optional<nanobind::object>
lookupOperationClass(llvm::StringRef operationName);
+ class TracebackLoc {
+ public:
+ bool locTracebacksEnabled() const;
+
+ void setLocTracebacksEnabled(bool value);
+
+ size_t locTracebackFramesLimit() const;
+
+ void setLocTracebackFramesLimit(size_t value);
+
+ void registerTracebackFileInclusion(const std::string &file);
+
+ void registerTracebackFileExclusion(const std::string &file);
+
+ bool isUserTracebackFilename(llvm::StringRef file);
+
+ private:
+ nanobind::ft_mutex mutex;
+ bool locTracebackEnabled_ = false;
+ size_t locTracebackFramesLimit_ = 10;
+ std::unordered_set<std::string> userTracebackIncludeFiles;
+ std::unordered_set<std::string> userTracebackExcludeFiles;
+ std::regex userTracebackIncludeRegex;
+ bool rebuildUserTracebackIncludeRegex = false;
+ std::regex userTracebackExcludeRegex;
+ bool rebuildUserTracebackExcludeRegex = false;
+ llvm::StringMap<bool> isUserTracebackFilenameCache;
+ };
+
+ TracebackLoc &getTracebackLoc() { return tracebackLoc; }
+
private:
static PyGlobals *instance;
@@ -134,6 +169,8 @@ class PyGlobals {
/// Set of dialect namespaces that we have attempted to import implementation
/// modules for.
llvm::StringSet<> loadedDialectModules;
+
+ TracebackLoc tracebackLoc;
};
} // namespace python
diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp
index 5feed95f96f53..9d280466795c6 100644
--- a/mlir/lib/Bindings/Python/IRCore.cpp
+++ b/mlir/lib/Bindings/Python/IRCore.cpp
@@ -20,11 +20,8 @@
#include "nanobind/nanobind.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/raw_ostream.h"
#include <optional>
-#include <system_error>
-#include <utility>
namespace nb = nanobind;
using namespace nb::literals;
@@ -1523,7 +1520,7 @@ nb::object PyOperation::create(std::string_view name,
llvm::ArrayRef<MlirValue> operands,
std::optional<nb::dict> attributes,
std::optional<std::vector<PyBlock *>> successors,
- int regions, DefaultingPyLocation location,
+ int regions, PyLocation location,
const nb::object &maybeIp, bool inferType) {
llvm::SmallVector<MlirType, 4> mlirResults;
llvm::SmallVector<MlirBlock, 4> mlirSuccessors;
@@ -1627,7 +1624,7 @@ nb::object PyOperation::create(std::string_view name,
if (!operation.ptr)
throw nb::value_error("Operation creation failed");
PyOperationRef created =
- PyOperation::createDetached(location->getContext(), operation);
+ PyOperation::createDetached(location.getContext(), operation);
maybeInsertOperation(created, maybeIp);
return created.getObject();
@@ -1937,9 +1934,9 @@ nb::object PyOpView::buildGeneric(
std::optional<nb::list> resultTypeList, nb::list operandList,
std::optional<nb::dict> attributes,
std::optional<std::vector<PyBlock *>> successors,
- std::optional<int> regions, DefaultingPyLocation location,
+ std::optional<int> regions, PyLocation location,
const nb::object &maybeIp) {
- PyMlirContextRef context = location->getContext();
+ PyMlirContextRef context = location.getContext();
// Class level operation construction metadata.
// Operand and result segment specs are either none, which does no
@@ -2789,6 +2786,111 @@ class PyOpAttributeMap {
PyOperationRef operation;
};
+// copied/borrow from
+// https://github.com/python/pythoncapi-compat/blob/b541b98df1e3e5aabb5def27422a75c876f5a88a/pythoncapi_compat.h#L222
+// bpo-40421 added PyFrame_GetLasti() to Python 3.11.0b1
+#if PY_VERSION_HEX < 0x030b00b1 && !defined(PYPY_VERSION)
+int PyFrame_GetLasti(PyFrameObject *frame) {
+#if PY_VERSION_HEX >= 0x030a00a7
+ // bpo-27129: Since Python 3.10.0a7, f_lasti is an instruction offset,
+ // not a bytes offset anymore. Python uses 16-bit "wordcode" (2 bytes)
+ // instructions.
+ if (frame->f_lasti < 0) {
+ return -1;
+ }
+ return frame->f_lasti * 2;
+#else
+ return frame->f_lasti;
+#endif
+}
+#endif
+
+constexpr size_t kMaxFrames = 512;
+
+MlirLocation tracebackToLocation(MlirContext ctx) {
+ size_t framesLimit =
+ PyGlobals::get().getTracebackLoc().locTracebackFramesLimit();
+ // We use a thread_local here mostly to avoid requiring a large amount of
+ // space.
+ thread_local std::array<MlirLocation, kMaxFrames> frames;
+ size_t count = 0;
+
+ assert(PyGILState_Check());
+
+ PyThreadState *tstate = PyThreadState_GET();
+
+ PyFrameObject *next;
+ for (PyFrameObject *pyFrame = PyThreadState_GetFrame(tstate);
+ pyFrame != nullptr && count < framesLimit;
+ next = PyFrame_GetBack(pyFrame), Py_XDECREF(pyFrame), pyFrame = next) {
+ PyCodeObject *code = PyFrame_GetCode(pyFrame);
+ auto fileNameStr =
+ nb::cast<std::string>(nb::borrow<nb::str>(code->co_filename));
+ llvm::StringRef fileName(fileNameStr);
+ if (!PyGlobals::get().getTracebackLoc().isUserTracebackFilename(fileName))
+ continue;
+
+#if PY_VERSION_HEX < 0x030b00f0
+ std::string name =
+ nb::cast<std::string>(nb::borrow<nb::str>(code->co_name));
+ llvm::StringRef funcName(name);
+ int startLine = PyFrame_GetLineNumber(pyFrame);
+ MlirLocation loc =
+ mlirLocationFileLineColGet(ctx, wrap(fileName), startLine, 0);
+#else
+ // co_qualname added in py3.11
+ std::string name =
+ nb::cast<std::string>(nb::borrow<nb::str>(code->co_qualname));
+ llvm::StringRef funcName(name);
+ int startLine, startCol, endLine, endCol;
+ int lasti = PyFrame_GetLasti(pyFrame);
+ if (!PyCode_Addr2Location(code, lasti, &startLine, &startCol, &endLine,
+ &endCol)) {
+ throw nb::python_error();
+ }
+ MlirLocation loc = mlirLocationFileLineColRangeGet(
+ ctx, wrap(fileName), startLine, startCol, endLine, endCol);
+#endif
+
+ frames[count] = mlirLocationNameGet(ctx, wrap(funcName), loc);
+ ++count;
+ if (count > framesLimit)
+ break;
+ }
+
+ if (count == 0)
+ return mlirLocationUnknownGet(ctx);
+ if (count == 1)
+ return frames.front();
+
+ MlirLocation callee = frames[0];
+ MlirLocation caller = frames[count - 1];
+ for (int i = count - 2; i >= 1; i--)
+ caller = mlirLocationCallSiteGet(frames[i], caller);
+
+ return mlirLocationCallSiteGet(callee, caller);
+}
+
+PyLocation
+maybeGetTracebackLocation(const std::optional<PyLocation> &location) {
+ MlirLocation mlirLoc;
+ MlirContext mlirCtx;
+ if (!location.has_value() &&
+ PyGlobals::get().getTracebackLoc().locTracebacksEnabled()) {
+ mlirCtx = DefaultingPyMlirContext::resolve().get();
+ mlirLoc = tracebackToLocation(mlirCtx);
+ } else if (!location.has_value()) {
+ mlirLoc = DefaultingPyLocation::resolve();
+ mlirCtx = mlirLocationGetContext(mlirLoc);
+ } else {
+ mlirLoc = *location;
+ mlirCtx = mlirLocationGetContext(mlirLoc);
+ }
+ assert(!mlirLocationIsNull(mlirLoc) && "expected non-null mlirLoc");
+ PyMlirContextRef ctx = PyMlirContext::forContext(mlirCtx);
+ return {ctx, mlirLoc};
+}
+
} // namespace
//------------------------------------------------------------------------------
@@ -3240,8 +3342,9 @@ void mlir::python::populateIRCore(nb::module_ &m) {
kModuleParseDocstring)
.def_static(
"create",
- [](DefaultingPyLocation loc) {
- MlirModule module = mlirModuleCreateEmpty(loc);
+ [](std::optional<PyLocation> loc) {
+ PyLocation pyLoc = maybeGetTracebackLocation(loc);
+ MlirModule module = mlirModuleCreateEmpty(pyLoc.get());
return PyModule::forModule(module).releaseObject();
},
nb::arg("loc").none() = nb::none(), "Creates an empty module")
@@ -3454,7 +3557,7 @@ void mlir::python::populateIRCore(nb::module_ &m) {
std::optional<std::vector<PyValue *>> operands,
std::optional<nb::dict> attributes,
std::optional<std::vector<PyBlock *>> successors, int regions,
- DefaultingPyLocation location, const nb::object &maybeIp,
+ std::optional<PyLocation> location, const nb::object &maybeIp,
bool inferType) {
// Unpack/validate operands.
llvm::SmallVector<MlirValue, 4> mlirOperands;
@@ -3467,8 +3570,9 @@ void mlir::python::populateIRCore(nb::module_ &m) {
}
}
+ PyLocation pyLoc = maybeGetTracebackLocation(location);
return PyOperation::create(name, results, mlirOperands, attributes,
- successors, regions, location, maybeIp,
+ successors, regions, pyLoc, maybeIp,
inferType);
},
nb::arg("name"), nb::arg("results").none() = nb::none(),
@@ -3512,12 +3616,13 @@ void mlir::python::populateIRCore(nb::module_ &m) {
std::optional<nb::list> resultTypeList, nb::list operandList,
std::optional<nb::dict> attributes,
std::optional<std::vector<PyBlock *>> successors,
- std::optional<int> regions, DefaultingPyLocation location,
+ std::optional<int> regions, std::optional<PyLocation> location,
const nb::object &maybeIp) {
+ PyLocation pyLoc = maybeGetTracebackLocation(location);
new (self) PyOpView(PyOpView::buildGeneric(
name, opRegionSpec, operandSegmentSpecObj,
resultSegmentSpecObj, resultTypeList, operandList,
- attributes, successors, regions, location, maybeIp));
+ attributes, successors, regions, pyLoc, maybeIp));
},
nb::arg("name"), nb::arg("opRegionSpec"),
nb::arg("operandSegmentSpecObj").none() = nb::none(),
@@ -3551,17 +3656,18 @@ void mlir::python::populateIRCore(nb::module_ &m) {
[](nb::handle cls, std::optional<nb::list> resultTypeList,
nb::list operandList, std::optional<nb::dict> attributes,
std::optional<std::vector<PyBlock *>> successors,
- std::optional<int> regions, DefaultingPyLocation location,
+ std::optional<int> regions, std::optional<PyLocation> location,
const nb::object &maybeIp) {
std::string name = nb::cast<std::string>(cls.attr("OPERATION_NAME"));
std::tuple<int, bool> opRegionSpec =
nb::cast<std::tuple<int, bool>>(cls.attr("_ODS_REGIONS"));
nb::object operandSegmentSpec = cls.attr("_ODS_OPERAND_SEGMENTS");
nb::object resultSegmentSpec = cls.attr("_ODS_RESULT_SEGMENTS");
+ PyLocation pyLoc = maybeGetTracebackLocation(location);
return PyOpView::buildGeneric(name, opRegionSpec, operandSegmentSpec,
resultSegmentSpec, resultTypeList,
operandList, attributes, successors,
- regions, location, maybeIp);
+ regions, pyLoc, maybeIp);
},
nb::arg("cls"), nb::arg("results").none() = nb::none(),
nb::arg("operands").none() = nb::none(),
diff --git a/mlir/lib/Bindings/Python/IRModule.cpp b/mlir/lib/Bindings/Python/IRModule.cpp
index e600f1bbd4493..bc172bd0cb722 100644
--- a/mlir/lib/Bindings/Python/IRModule.cpp
+++ b/mlir/lib/Bindings/Python/IRModule.cpp
@@ -13,9 +13,9 @@
#include "Globals.h"
#include "NanobindUtils.h"
+#include "mlir-c/Bindings/Python/Interop.h" // This is expected after nanobind.
#include "mlir-c/Support.h"
#include "mlir/Bindings/Python/Nanobind.h"
-#include "mlir-c/Bindings/Python/Interop.h" // This is expected after nanobind.
namespace nb = nanobind;
using namespace mlir;
@@ -197,3 +197,59 @@ PyGlobals::lookupOperationClass(llvm::StringRef operationName) {
// Not found and loading did not yield a registration.
return std::nullopt;
}
+
+bool PyGlobals::TracebackLoc::locTracebacksEnabled() const {
+ return locTracebackEnabled_;
+}
+
+void PyGlobals::TracebackLoc::setLocTracebacksEnabled(bool value) {
+ nanobind::ft_lock_guard lock(mutex);
+ locTracebackEnabled_ = value;
+}
+
+size_t PyGlobals::TracebackLoc::locTracebackFramesLimit() const {
+ return locTracebackFramesLimit_;
+}
+
+void PyGlobals::TracebackLoc::setLocTracebackFramesLimit(size_t value) {
+ nanobind::ft_lock_guard lock(mutex);
+ locTracebackFramesLimit_ = value;
+}
+
+void PyGlobals::TracebackLoc::registerTracebackFileInclusion(
+ const std::string &file) {
+ nanobind::ft_lock_guard lock(mutex);
+ userTracebackIncludeFiles.insert("^" + llvm::Regex::escape(file));
+ rebuildUserTracebackIncludeRegex = true;
+}
+
+void PyGlobals::TracebackLoc::registerTracebackFileExclusion(
+ const std::string &file) {
+ nanobind::ft_lock_guard lock(mutex);
+ userTracebackExcludeFiles.insert("^" + llvm::Regex::escape(file));
+ rebuildUserTracebackExcludeRegex = true;
+}
+
+bool PyGlobals::TracebackLoc::isUserTracebackFilename(
+ const llvm::StringRef file) {
+ nanobind::ft_lock_guard lock(mutex);
+ if (rebuildUserTracebackIncludeRegex) {
+ userTracebackIncludeRegex.assign(
+ llvm::join(userTracebackIncludeFiles, "|"));
+ rebuildUserTracebackIncludeRegex = false;
+ isUserTracebackFilenameCache.clear();
+ }
+ if (rebuildUserTracebackExcludeRegex) {
+ userTracebackExcludeRegex.assign(
+ llvm::join(userTracebackExcludeFiles, "|"));
+ rebuildUserTracebackExcludeRegex = false;
+ isUserTracebackFilenameCache.clear();
+ }
+ if (!isUserTracebackFilenameCache.contains(file)) {
+ std::string fileStr = file.str();
+ bool include = std::regex_search(fileStr, userTracebackIncludeRegex);
+ bool exclude = std::regex_search(fileStr, userTracebackExcludeRegex);
+ isUserTracebackFilenameCache[file] = include || !exclude;
+ }
+ return isUserTracebackFilenameCache[file];
+}
diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h
index 9c22dea157c06..87e1a0b12da00 100644
--- a/mlir/lib/Bindings/Python/IRModule.h
+++ b/mlir/lib/Bindings/Python/IRModule.h
@@ -722,8 +722,7 @@ class PyOperation : public PyOperationBase, public BaseContextObject {
llvm::ArrayRef<MlirValue> operands,
std::optional<nanobind::dict> attributes,
std::optional<std::vector<PyBlock *>> successors, int regions,
- DefaultingPyLocation location, const nanobind::object &ip,
- bool inferType);
+ PyLocation location, const nanobind::object &ip, bool inferType);
/// Creates an OpView suitable for this operation.
nanobind::object createOpView();
@@ -781,7 +780,7 @@ class PyOpView : public PyOperationBase {
nanobind::list operandList,
std::optional<nanobind::dict> attributes,
std::optional<std::vector<PyBlock *>> successors,
- std::optional<int> regions, DefaultingPyLocation location,
+ std::optional<int> regions, PyLocation location,
const nanobind::object &maybeIp);
/// Construct an instance of a class deriving from OpView, bypassing its
diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp
index 6f49431006605..278847e7ac7f5 100644
--- a/mlir/lib/Bindings/Python/MainModule.cpp
+++ b/mlir/lib/Bindings/Python/MainModule.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "Globals.h"
#include "IRModule.h"
#include "NanobindUtils.h"
@@ -44,7 +43,27 @@ NB_MODULE(_mlir, m) {
.def("_register_operation_impl", &PyGlobals::registerOperationImpl,
"operation_name"_a, "operation_class"_a, nb::kw_only(),
"replace"_a = false,
- "Testing hook for directly registering an operation");
+ "Testing hook for directly registering an operation")
+ .def("loc_tracebacks_enabled",
+ [](PyGlobals &self) {
+ return self.getTracebackLoc().locTracebacksEnabled();
+ })
+ .def("set_loc_tracebacks_enabled",
+ [](PyGlobals &self, bool enabled) {
+ self.getTracebackLoc().setLocTracebacksEnabled(enabled);
+ })
+ .def("set_loc_tracebacks_frame_limit",
+ [](PyGlobals &self, int n) {
+ self.getTracebackLoc().setLocTracebackFramesLimit(n);
+ })
+ .def("register_traceback_file_inclusion",
+ [](PyGlobals &self, const std::string &filename) {
+ self.getTracebackLoc().registerTracebackFileInclusion(filename);
+ })
+ .def("register_traceback_file_exclusion",
+ [](PyGlobals &self, const std::string &filename) {
+ self.getTracebackLoc().registerTracebackFileExclusion(filename);
+ });
// Aside from making the globals accessible to python, having python manage
// it is necessary to make sure it is destroyed (and releases its python
diff --git a/mlir/test/python/ir/auto_location.py b/mlir/test/python/ir/auto_location.py
new file mode 100644
index 0000000000000..10a63d5f0f89c
--- /dev/null
+++ b/mlir/test/python/ir/auto_location.py
@@ -0,0 +1,52 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import gc
+from contextlib import contextmanager
+
+from mlir.ir import *
+from mlir.dialects._ods_common import _cext
+from mlir.dialects import arith, _arith_ops_gen
+
+
+def run(f):
+ print("\nTEST:", f.__name__)
+ f()
+ gc.collect()
+ assert Context._get_live_count() == 0
+
+
+ at contextmanager
+def with_infer_location():
+ _cext.globals.set_loc_tracebacks_enabled(True)
+ yield
+ _cext.globals.set_loc_tracebacks_enabled(False)
+
+
+# CHECK-LABEL: TEST: testInferLocations
+def testInferLocations():
+ with Context() as ctx, Location.unknown(), with_infer_location():
+ ctx.allow_unregistered_dialects = True
+ op = Operation.create("custom.op1")
+ one = arith.constant(IndexType.get(), 1)
+ _cext.globals.register_traceback_file_exclusion(arith.__file__)
+ two = arith.constant(IndexType.get(), 2)
+
+ # CHECK: loc(callsite("testInferLocations"("{{.*}}/test/python/ir/auto_location.py":29:13 to :43)
+ # CHECK-SAME: at callsite("run"("{{.*}}/test/python/ir/auto_location.py":13:4 to :7)
+ # CHECK-SAME: at "<module>"("{{.*}}/test/python/ir/auto_location.py":52:0 to :23))))
+ print(op.location)
+
+ # CHECK: loc(callsite("ConstantOp.__init__"("{{.*}}/mlir/dialects/arith.py":65:12 to :76)
+ # CHECK-SAME: at callsite("constant"("{{.*}}/mlir/dialects/arith.py":110:40 to :81)
+ # CHECK-SAME: at callsite("testInferLocations"("{{.*}}/test/python/ir/auto_location.py":30:14 to :48)
+ # CHECK-SAME: at callsite("run"("{{.*}}/test/python/ir/auto_location.py":13:4 to :7)
+ # CHECK-SAME: at "<module>"("{{.*}}/test/python/ir/auto_location.py":52:0 to :23))))))
+ print(one.location)
+
+ # CHECK: loc(callsite("testInferLocations"("{{.*}}/test/python/ir/auto_location.py":32:14 to :48)
+ # CHECK-SAME: at callsite("run"("{{.*}}/test/python/ir/auto_location.py":13:4 to :7)
+ # CHECK-SAME: at "<module>"("{{.*}}/test/python/ir/auto_location.py":52:0 to :23))))
+ print(two.location)
+
+
+run(testInferLocations)
diff --git a/mlir/test/python/ir/lit.local.cfg b/mlir/test/python/ir/lit.local.cfg
new file mode 100644
index 0000000000000..f59e80bc93ab8
--- /dev/null
+++ b/mlir/test/python/ir/lit.local.cfg
@@ -0,0 +1,2 @@
+if "Windows" in config.host_os:
+ confi...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151246
More information about the Mlir-commits
mailing list