[Mlir-commits] [mlir] [mlir][python] auto-locs (PR #151246)

Maksim Levental llvmlistbot at llvm.org
Tue Jul 29 22:58:38 PDT 2025


https://github.com/makslevental updated https://github.com/llvm/llvm-project/pull/151246

>From 82847687d12bf6666dc93f0ba1a36976dd6c55cc Mon Sep 17 00:00:00 2001
From: max <maksim.levental at gmail.com>
Date: Tue, 29 Jul 2025 18:21:53 -0400
Subject: [PATCH] [mlir][python] auto-locs

---
 mlir/lib/Bindings/Python/Globals.h      |  69 +++++++++++
 mlir/lib/Bindings/Python/IRCore.cpp     | 156 +++++++++++++++++++++---
 mlir/lib/Bindings/Python/IRModule.h     |   5 +-
 mlir/lib/Bindings/Python/MainModule.cpp |  11 +-
 mlir/test/python/ir/auto_location.py    |  54 ++++++++
 mlir/test/python/ir/lit.local.cfg       |   2 +
 6 files changed, 276 insertions(+), 21 deletions(-)
 create mode 100644 mlir/test/python/ir/auto_location.py
 create mode 100644 mlir/test/python/ir/lit.local.cfg

diff --git a/mlir/lib/Bindings/Python/Globals.h b/mlir/lib/Bindings/Python/Globals.h
index 826a34a535176..bee48f077c438 100644
--- a/mlir/lib/Bindings/Python/Globals.h
+++ b/mlir/lib/Bindings/Python/Globals.h
@@ -10,15 +10,19 @@
 #define MLIR_BINDINGS_PYTHON_GLOBALS_H
 
 #include <optional>
+#include <regex>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "NanobindUtils.h"
 #include "mlir-c/IR.h"
 #include "mlir/CAPI/Support.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Regex.h"
 
 namespace mlir {
 namespace python {
@@ -114,11 +118,76 @@ class PyGlobals {
   std::optional<nanobind::object>
   lookupOperationClass(llvm::StringRef operationName);
 
+  bool locTracebacksEnabled() {
+    nanobind::ft_lock_guard lock(mutex);
+    return locTracebackEnabled_;
+  }
+
+  void setLocTracebacksEnabled(const bool value) {
+    nanobind::ft_lock_guard lock(mutex);
+    locTracebackEnabled_ = value;
+  }
+
+  size_t locTracebackFramesLimit() {
+    nanobind::ft_lock_guard lock(mutex);
+    return locTracebackFramesLimit_;
+  }
+
+  void setLocTracebackFramesLimit(const size_t value) {
+    nanobind::ft_lock_guard lock(mutex);
+    locTracebackFramesLimit_ = value;
+  }
+
+  void registerTracebackFileInclusion(const std::string &file) {
+    nanobind::ft_lock_guard lock(mutex);
+    userTracebackIncludeFiles.insert("^" + llvm::Regex::escape(file));
+    rebuildUserTracebackIncludeRegex = true;
+  }
+
+  void registerTracebackFileExclusion(const std::string &file) {
+    nanobind::ft_lock_guard lock(mutex);
+    userTracebackExcludeFiles.insert("^" + llvm::Regex::escape(file));
+    rebuildUserTracebackExcludeRegex = true;
+  }
+
+  bool isUserTracebackFilename(const llvm::StringRef file) {
+    nanobind::ft_lock_guard lock(mutex);
+    if (rebuildUserTracebackIncludeRegex) {
+      userTracebackIncludeRegex.assign(
+          llvm::join(userTracebackIncludeFiles, "|"));
+      rebuildUserTracebackIncludeRegex = false;
+      isUserTracebackFilenameCache.clear();
+    }
+    if (rebuildUserTracebackExcludeRegex) {
+      userTracebackExcludeRegex.assign(
+          llvm::join(userTracebackExcludeFiles, "|"));
+      rebuildUserTracebackExcludeRegex = false;
+      isUserTracebackFilenameCache.clear();
+    }
+    if (!isUserTracebackFilenameCache.contains(file)) {
+      std::string fileStr = file.str();
+      bool include = std::regex_search(fileStr, userTracebackIncludeRegex);
+      bool exclude = std::regex_search(fileStr, userTracebackExcludeRegex);
+      isUserTracebackFilenameCache[file] = include || !exclude;
+    }
+    return isUserTracebackFilenameCache[file];
+  }
+
 private:
   static PyGlobals *instance;
 
   nanobind::ft_mutex mutex;
 
+  bool locTracebackEnabled_ = false;
+  size_t locTracebackFramesLimit_ = 10;
+  std::unordered_set<std::string> userTracebackIncludeFiles;
+  std::unordered_set<std::string> userTracebackExcludeFiles;
+  std::regex userTracebackIncludeRegex;
+  bool rebuildUserTracebackIncludeRegex = false;
+  std::regex userTracebackExcludeRegex;
+  bool rebuildUserTracebackExcludeRegex = false;
+  llvm::StringMap<bool> isUserTracebackFilenameCache;
+
   /// Module name prefixes to search under for dialect implementation modules.
   std::vector<std::string> dialectSearchPrefixes;
   /// Map of dialect namespace to external dialect class object.
diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp
index 5feed95f96f53..10798cd5f30ea 100644
--- a/mlir/lib/Bindings/Python/IRCore.cpp
+++ b/mlir/lib/Bindings/Python/IRCore.cpp
@@ -20,11 +20,8 @@
 #include "nanobind/nanobind.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/raw_ostream.h"
 
 #include <optional>
-#include <system_error>
-#include <utility>
 
 namespace nb = nanobind;
 using namespace nb::literals;
@@ -1523,7 +1520,7 @@ nb::object PyOperation::create(std::string_view name,
                                llvm::ArrayRef<MlirValue> operands,
                                std::optional<nb::dict> attributes,
                                std::optional<std::vector<PyBlock *>> successors,
-                               int regions, DefaultingPyLocation location,
+                               int regions, PyLocation location,
                                const nb::object &maybeIp, bool inferType) {
   llvm::SmallVector<MlirType, 4> mlirResults;
   llvm::SmallVector<MlirBlock, 4> mlirSuccessors;
@@ -1627,7 +1624,7 @@ nb::object PyOperation::create(std::string_view name,
   if (!operation.ptr)
     throw nb::value_error("Operation creation failed");
   PyOperationRef created =
-      PyOperation::createDetached(location->getContext(), operation);
+      PyOperation::createDetached(location.getContext(), operation);
   maybeInsertOperation(created, maybeIp);
 
   return created.getObject();
@@ -1937,9 +1934,9 @@ nb::object PyOpView::buildGeneric(
     std::optional<nb::list> resultTypeList, nb::list operandList,
     std::optional<nb::dict> attributes,
     std::optional<std::vector<PyBlock *>> successors,
-    std::optional<int> regions, DefaultingPyLocation location,
+    std::optional<int> regions, PyLocation location,
     const nb::object &maybeIp) {
-  PyMlirContextRef context = location->getContext();
+  PyMlirContextRef context = location.getContext();
 
   // Class level operation construction metadata.
   // Operand and result segment specs are either none, which does no
@@ -2789,6 +2786,88 @@ class PyOpAttributeMap {
   PyOperationRef operation;
 };
 
+// bpo-40421 added PyFrame_GetLasti() to Python 3.11.0b1
+#if PY_VERSION_HEX < 0x030b00b1 && !defined(PYPY_VERSION)
+int PyFrame_GetLasti(PyFrameObject *frame) {
+#if PY_VERSION_HEX >= 0x030a00a7
+  // bpo-27129: Since Python 3.10.0a7, f_lasti is an instruction offset,
+  // not a bytes offset anymore. Python uses 16-bit "wordcode" (2 bytes)
+  // instructions.
+  if (frame->f_lasti < 0) {
+    return -1;
+  }
+  return frame->f_lasti * 2;
+#else
+  return frame->f_lasti;
+#endif
+}
+#endif
+
+constexpr size_t kMaxFrames = 512;
+
+MlirLocation tracebackToLocation(MlirContext ctx) {
+  size_t framesLimit = PyGlobals::get().locTracebackFramesLimit();
+  // We use a thread_local here mostly to avoid requiring a large amount of
+  // space.
+  thread_local std::array<MlirLocation, kMaxFrames> frames;
+  size_t count = 0;
+
+  assert(PyGILState_Check());
+
+  PyThreadState *tstate = PyThreadState_GET();
+
+  PyFrameObject *next;
+  for (PyFrameObject *pyFrame = PyThreadState_GetFrame(tstate);
+       pyFrame != nullptr && count < framesLimit;
+       next = PyFrame_GetBack(pyFrame), Py_XDECREF(pyFrame), pyFrame = next) {
+    PyCodeObject *code = PyFrame_GetCode(pyFrame);
+    auto fileNameStr =
+        nb::cast<std::string>(nb::borrow<nb::str>(code->co_filename));
+    llvm::StringRef fileName(fileNameStr);
+    if (!PyGlobals::get().isUserTracebackFilename(fileName))
+      continue;
+
+#if PY_VERSION_HEX < 0x030b00f0
+    std::string name =
+        nb::cast<std::string>(nb::borrow<nb::str>(code->co_name));
+    llvm::StringRef funcName(name);
+    int startLine = PyFrame_GetLineNumber(pyFrame);
+    MlirLocation loc =
+        mlirLocationFileLineColGet(ctx, wrap(fileName), startLine, 0);
+#else
+    // co_qualname added in py3.11
+    std::string name =
+        nb::cast<std::string>(nb::borrow<nb::str>(code->co_qualname));
+    llvm::StringRef funcName(name);
+    int startLine, startCol, endLine, endCol;
+    int lasti = PyFrame_GetLasti(pyFrame);
+    if (!PyCode_Addr2Location(code, lasti, &startLine, &startCol, &endLine,
+                              &endCol)) {
+      throw nb::python_error();
+    }
+    MlirLocation loc = mlirLocationFileLineColRangeGet(
+        ctx, wrap(fileName), startLine, startCol, endLine, endCol);
+#endif
+
+    frames[count] = mlirLocationNameGet(ctx, wrap(funcName), loc);
+    ++count;
+    if (count > framesLimit)
+      break;
+  }
+
+  if (count == 0)
+    return mlirLocationUnknownGet(ctx);
+  if (count == 1)
+    return frames.front();
+
+  MlirLocation callee = frames[0];
+  MlirLocation caller = frames[count - 1];
+  for (int i = count - 2; i >= 1; i--)
+    caller = mlirLocationCallSiteGet(frames[i], caller);
+
+  return mlirLocationCallSiteGet(callee, caller);
+}
+
 } // namespace
 
 //------------------------------------------------------------------------------
@@ -3240,8 +3319,16 @@ void mlir::python::populateIRCore(nb::module_ &m) {
           kModuleParseDocstring)
       .def_static(
           "create",
-          [](DefaultingPyLocation loc) {
-            MlirModule module = mlirModuleCreateEmpty(loc);
+          [](std::optional<PyLocation> loc) {
+            MlirLocation mlirLoc;
+            if (!loc.has_value() && PyGlobals::get().locTracebacksEnabled()) {
+              mlirLoc =
+                  tracebackToLocation(DefaultingPyMlirContext::resolve().get());
+            } else if (!loc.has_value()) {
+              mlirLoc = DefaultingPyLocation::resolve();
+            }
+            assert(!mlirLocationIsNull(mlirLoc) && "expected non-null mlirLoc");
+            MlirModule module = mlirModuleCreateEmpty(mlirLoc);
             return PyModule::forModule(module).releaseObject();
           },
           nb::arg("loc").none() = nb::none(), "Creates an empty module")
@@ -3454,7 +3541,7 @@ void mlir::python::populateIRCore(nb::module_ &m) {
              std::optional<std::vector<PyValue *>> operands,
              std::optional<nb::dict> attributes,
              std::optional<std::vector<PyBlock *>> successors, int regions,
-             DefaultingPyLocation location, const nb::object &maybeIp,
+             std::optional<PyLocation> location, const nb::object &maybeIp,
              bool inferType) {
             // Unpack/validate operands.
             llvm::SmallVector<MlirValue, 4> mlirOperands;
@@ -3467,9 +3554,21 @@ void mlir::python::populateIRCore(nb::module_ &m) {
               }
             }
 
+            MlirLocation mlirLoc;
+            MlirContext mlirCtx;
+            if (!location.has_value() &&
+                PyGlobals::get().locTracebacksEnabled()) {
+              mlirCtx = DefaultingPyMlirContext::resolve().get();
+              mlirLoc = tracebackToLocation(mlirCtx);
+            } else if (!location.has_value()) {
+              mlirLoc = DefaultingPyLocation::resolve();
+              mlirCtx = mlirLocationGetContext(mlirLoc);
+            }
+            assert(!mlirLocationIsNull(mlirLoc) && "expected non-null mlirLoc");
+            PyMlirContextRef ctx = PyMlirContext::forContext(mlirCtx);
             return PyOperation::create(name, results, mlirOperands, attributes,
-                                       successors, regions, location, maybeIp,
-                                       inferType);
+                                       successors, regions, {ctx, mlirLoc},
+                                       maybeIp, inferType);
           },
           nb::arg("name"), nb::arg("results").none() = nb::none(),
           nb::arg("operands").none() = nb::none(),
@@ -3512,12 +3611,25 @@ void mlir::python::populateIRCore(nb::module_ &m) {
                  std::optional<nb::list> resultTypeList, nb::list operandList,
                  std::optional<nb::dict> attributes,
                  std::optional<std::vector<PyBlock *>> successors,
-                 std::optional<int> regions, DefaultingPyLocation location,
+                 std::optional<int> regions, std::optional<PyLocation> location,
                  const nb::object &maybeIp) {
+                MlirLocation mlirLoc;
+                MlirContext mlirCtx;
+                if (!location.has_value() &&
+                    PyGlobals::get().locTracebacksEnabled()) {
+                  mlirCtx = DefaultingPyMlirContext::resolve().get();
+                  mlirLoc = tracebackToLocation(mlirCtx);
+                } else if (!location.has_value()) {
+                  mlirLoc = DefaultingPyLocation::resolve();
+                  mlirCtx = mlirLocationGetContext(mlirLoc);
+                }
+                assert(!mlirLocationIsNull(mlirLoc) &&
+                       "expected non-null mlirLoc");
+                PyMlirContextRef ctx = PyMlirContext::forContext(mlirCtx);
                 new (self) PyOpView(PyOpView::buildGeneric(
                     name, opRegionSpec, operandSegmentSpecObj,
                     resultSegmentSpecObj, resultTypeList, operandList,
-                    attributes, successors, regions, location, maybeIp));
+                    attributes, successors, regions, {ctx, mlirLoc}, maybeIp));
               },
               nb::arg("name"), nb::arg("opRegionSpec"),
               nb::arg("operandSegmentSpecObj").none() = nb::none(),
@@ -3551,17 +3663,29 @@ void mlir::python::populateIRCore(nb::module_ &m) {
       [](nb::handle cls, std::optional<nb::list> resultTypeList,
          nb::list operandList, std::optional<nb::dict> attributes,
          std::optional<std::vector<PyBlock *>> successors,
-         std::optional<int> regions, DefaultingPyLocation location,
+         std::optional<int> regions, std::optional<PyLocation> location,
          const nb::object &maybeIp) {
         std::string name = nb::cast<std::string>(cls.attr("OPERATION_NAME"));
         std::tuple<int, bool> opRegionSpec =
             nb::cast<std::tuple<int, bool>>(cls.attr("_ODS_REGIONS"));
         nb::object operandSegmentSpec = cls.attr("_ODS_OPERAND_SEGMENTS");
         nb::object resultSegmentSpec = cls.attr("_ODS_RESULT_SEGMENTS");
+
+        MlirLocation mlirLoc;
+        MlirContext mlirCtx;
+        if (!location.has_value() && PyGlobals::get().locTracebacksEnabled()) {
+          mlirCtx = DefaultingPyMlirContext::resolve().get();
+          mlirLoc = tracebackToLocation(mlirCtx);
+        } else if (!location.has_value()) {
+          mlirLoc = DefaultingPyLocation::resolve();
+          mlirCtx = mlirLocationGetContext(mlirLoc);
+        }
+        assert(!mlirLocationIsNull(mlirLoc) && "expected non-null mlirLoc");
+        PyMlirContextRef ctx = PyMlirContext::forContext(mlirCtx);
         return PyOpView::buildGeneric(name, opRegionSpec, operandSegmentSpec,
                                       resultSegmentSpec, resultTypeList,
                                       operandList, attributes, successors,
-                                      regions, location, maybeIp);
+                                      regions, {ctx, mlirLoc}, maybeIp);
       },
       nb::arg("cls"), nb::arg("results").none() = nb::none(),
       nb::arg("operands").none() = nb::none(),
diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h
index 9c22dea157c06..87e1a0b12da00 100644
--- a/mlir/lib/Bindings/Python/IRModule.h
+++ b/mlir/lib/Bindings/Python/IRModule.h
@@ -722,8 +722,7 @@ class PyOperation : public PyOperationBase, public BaseContextObject {
          llvm::ArrayRef<MlirValue> operands,
          std::optional<nanobind::dict> attributes,
          std::optional<std::vector<PyBlock *>> successors, int regions,
-         DefaultingPyLocation location, const nanobind::object &ip,
-         bool inferType);
+         PyLocation location, const nanobind::object &ip, bool inferType);
 
   /// Creates an OpView suitable for this operation.
   nanobind::object createOpView();
@@ -781,7 +780,7 @@ class PyOpView : public PyOperationBase {
                nanobind::list operandList,
                std::optional<nanobind::dict> attributes,
                std::optional<std::vector<PyBlock *>> successors,
-               std::optional<int> regions, DefaultingPyLocation location,
+               std::optional<int> regions, PyLocation location,
                const nanobind::object &maybeIp);
 
   /// Construct an instance of a class deriving from OpView, bypassing its
diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp
index 6f49431006605..25174dff4f9b8 100644
--- a/mlir/lib/Bindings/Python/MainModule.cpp
+++ b/mlir/lib/Bindings/Python/MainModule.cpp
@@ -6,7 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "Globals.h"
 #include "IRModule.h"
 #include "NanobindUtils.h"
@@ -44,7 +43,15 @@ NB_MODULE(_mlir, m) {
       .def("_register_operation_impl", &PyGlobals::registerOperationImpl,
            "operation_name"_a, "operation_class"_a, nb::kw_only(),
            "replace"_a = false,
-           "Testing hook for directly registering an operation");
+           "Testing hook for directly registering an operation")
+      .def("loc_tracebacks_enabled", &PyGlobals::locTracebacksEnabled)
+      .def("set_loc_tracebacks_enabled", &PyGlobals::setLocTracebacksEnabled)
+      .def("set_loc_tracebacks_frame_limit",
+           &PyGlobals::setLocTracebackFramesLimit)
+      .def("register_traceback_file_inclusion",
+           &PyGlobals::registerTracebackFileInclusion)
+      .def("register_traceback_file_exclusion",
+           &PyGlobals::registerTracebackFileExclusion);
 
   // Aside from making the globals accessible to python, having python manage
   // it is necessary to make sure it is destroyed (and releases its python
diff --git a/mlir/test/python/ir/auto_location.py b/mlir/test/python/ir/auto_location.py
new file mode 100644
index 0000000000000..1b7f615728cc8
--- /dev/null
+++ b/mlir/test/python/ir/auto_location.py
@@ -0,0 +1,54 @@
+# RUN: %PYTHON %s | FileCheck %s
+
+import gc
+from contextlib import contextmanager
+
+from mlir.ir import *
+from mlir.dialects._ods_common import _cext
+from mlir.dialects import arith, _arith_ops_gen
+
+
+def run(f):
+    print("\nTEST:", f.__name__)
+    f()
+    gc.collect()
+    assert Context._get_live_count() == 0
+
+
+ at contextmanager
+def with_infer_location():
+    _cext.globals.set_loc_tracebacks_enabled(True)
+    yield
+    _cext.globals.set_loc_tracebacks_enabled(False)
+
+
+# CHECK-LABEL: TEST: testInferLocations
+def testInferLocations():
+    with Context() as ctx, Location.unknown(), with_infer_location():
+        ctx.allow_unregistered_dialects = True
+        op = Operation.create("custom.op1")
+        one = arith.constant(IndexType.get(), 1)
+        _cext.globals.register_traceback_file_exclusion(arith.__file__)
+        _cext.globals.register_traceback_file_exclusion(_arith_ops_gen.__file__)
+        two = arith.constant(IndexType.get(), 2)
+
+        # CHECK: loc(callsite("testInferLocations"("{{.*}}/test/python/ir/auto_location.py":29:13 to :43)
+        # CHECK-SAME: at callsite("run"("{{.*}}/test/python/ir/auto_location.py":13:4 to :7)
+        # CHECK-SAME: at "<module>"("{{.*}}/test/python/ir/auto_location.py":54:0 to :23))))
+        print(op.location)
+
+        # CHECK: loc(callsite("ConstantOp.__init__"("{{.*}}/mlir/dialects/_arith_ops_gen.py":404:4 to :218)
+        # CHECK-SAME: at callsite("ConstantOp.__init__"("{{.*}}/mlir/dialects/arith.py":65:12 to :76)
+        # CHECK-SAME: at callsite("constant"("{{.*}}/mlir/dialects/arith.py":110:40 to :81)
+        # CHECK-SAME: at callsite("testInferLocations"("{{.*}}/test/python/ir/auto_location.py":30:14 to :48)
+        # CHECK-SAME: at callsite("run"("{{.*}}/test/python/ir/auto_location.py":13:4 to :7)
+        # CHECK-SAME: at "<module>"("{{.*}}/test/python/ir/auto_location.py":54:0 to :23)))))))
+        print(one.location)
+
+        # CHECK: loc(callsite("testInferLocations"("{{.*}}/test/python/ir/auto_location.py":33:14 to :48)
+        # CHECK-SAME: at callsite("run"("{{.*}}/test/python/ir/auto_location.py":13:4 to :7)
+        # CHECK-SAME: at "<module>"("{{.*}}/test/python/ir/auto_location.py":54:0 to :23))))
+        print(two.location)
+
+
+run(testInferLocations)
diff --git a/mlir/test/python/ir/lit.local.cfg b/mlir/test/python/ir/lit.local.cfg
new file mode 100644
index 0000000000000..f59e80bc93ab8
--- /dev/null
+++ b/mlir/test/python/ir/lit.local.cfg
@@ -0,0 +1,2 @@
+if "Windows" in config.host_os:
+    config.excludes.add("auto_location.py")



More information about the Mlir-commits mailing list