[Mlir-commits] [mlir] Added free-threading CPython mode support in MLIR Python bindings (PR #107103)

Fri Jan 10 07:07:10 PST 2025

================
@@ -0,0 +1,518 @@
+# RUN: %PYTHON %s
+"""
+This script generates multi-threaded tests to check free-threading mode using CPython compiled with TSAN.
+Tests can be run using pytest:
+```bash
+python3.13t -mpytest -vvv multithreaded_tests.py
+```
+
+IMPORTANT. Running tests are not checking the correctness, but just the execution of the tests in multi-threaded context
+and passing if no warnings reported by TSAN and failing otherwise.
+
+
+Details on the generated tests and execution:
+1) Multi-threaded execution: all generated tests are executed independently by
+a pool of threads, running each test multiple times, see @multi_threaded for details
+
+2) Tests generation: we use existing tests: test/python/ir/*.py,
+test/python/dialects/*.py, etc to generate multi-threaded tests.
+In details, we perform the following:
+a) we define a list of source tests to be used to generate multi-threaded tests, see `TEST_MODULES`.
+b) we define `TestAllMultiThreaded` class and add existing tests to the class. See `add_existing_tests` method.
+c) for each test file, we copy and modify it: test/python/ir/affine_expr.py -> /tmp/ir/affine_expr.py.
+In order to import the test file as python module, we remove all executing functions, like
+`@run` or `run(testMethod)`. See `copy_and_update` and `add_existing_tests` methods for details.
+
+
+Observed warnings reported by TSAN.
+
+CPython and free-threading known data-races:
+1) ctypes related races: https://github.com/python/cpython/issues/127945
+2) LLVM related data-races, llvm::raw_ostream is not thread-safe
+- mlir pass manager
+- dialects/transform_interpreter.py
+- ir/diagnostic_handler.py
+- ir/module.py
+3) Dialect gpu module-to-binary method is unsafe
+"""
+import concurrent.futures
+import gc
+import importlib.util
+import os
+import sys
+import threading
+import tempfile
+import unittest
+
+from contextlib import contextmanager
+from functools import partial
+from pathlib import Path
+from typing import Optional
+
+import mlir.dialects.arith as arith
+from mlir.dialects import transform
+from mlir.ir import Context, Location, Module, IntegerType, InsertionPoint
+
+
+def import_from_path(module_name: str, file_path: Path):
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+def copy_and_update(src_filepath: Path, dst_filepath: Path):
+    # We should remove all calls like `run(testMethod)`
+    with open(src_filepath, "r") as reader, open(dst_filepath, "w") as writer:
+        while True:
+            src_line = reader.readline()
+            if len(src_line) == 0:
+                break
+            skip_lines = [
+                "run(",
+                "@run",
+                "@constructAndPrintInModule",
+                "run_apply_patterns(",
+                "@run_apply_patterns",
+                "@test_in_context",
+                "@construct_and_print_in_module",
+            ]
+            if any(src_line.startswith(line) for line in skip_lines):
+                continue
+            writer.write(src_line)
+
+
+# Helper run functions
+def run(f):
+    f()
+
+
+def run_with_context_and_location(f):
+    print("\nTEST:", f.__name__)
+    with Context(), Location.unknown():
+        f()
+    return f
+
+
+def run_with_insertion_point(f):
+    print("\nTEST:", f.__name__)
+    with Context() as ctx, Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            f(ctx)
+        print(module)
+
+
+def run_with_insertion_point_v2(f):
+    print("\nTEST:", f.__name__)
+    with Context(), Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            f()
+        print(module)
+    return f
+
+
+def run_with_insertion_point_v3(f):
+    with Context(), Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            print("\nTEST:", f.__name__)
+            f(module)
+        print(module)
+    return f
+
+
+def run_with_insertion_point_v4(f):
+    print("\nTEST:", f.__name__)
+    with Context() as ctx, Location.unknown():
+        ctx.allow_unregistered_dialects = True
+        module = Module.create()
+        with InsertionPoint(module.body):
+            f()
+    return f
+
+
+def run_apply_patterns(f):
+    with Context(), Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            sequence = transform.SequenceOp(
+                transform.FailurePropagationMode.Propagate,
+                [],
+                transform.AnyOpType.get(),
+            )
+            with InsertionPoint(sequence.body):
+                apply = transform.ApplyPatternsOp(sequence.bodyTarget)
+                with InsertionPoint(apply.patterns):
+                    f()
+                transform.YieldOp()
+        print("\nTEST:", f.__name__)
+        print(module)
+    return f
+
+
+def run_transform_tensor_ext(f):
+    print("\nTEST:", f.__name__)
+    with Context(), Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            sequence = transform.SequenceOp(
+                transform.FailurePropagationMode.Propagate,
+                [],
+                transform.AnyOpType.get(),
+            )
+            with InsertionPoint(sequence.body):
+                f(sequence.bodyTarget)
+                transform.YieldOp()
+        print(module)
+    return f
+
+
+def run_transform_structured_ext(f):
+    with Context(), Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            print("\nTEST:", f.__name__)
+            f()
+        module.operation.verify()
+        print(module)
+    return f
+
+
+def run_construct_and_print_in_module(f):
+    print("\nTEST:", f.__name__)
+    with Context(), Location.unknown():
+        module = Module.create()
+        with InsertionPoint(module.body):
+            module = f(module)
+        if module is not None:
+            print(module)
+    return f
+
+
+TEST_MODULES = [
+    ("execution_engine", run),
+    ("pass_manager", run),
+    ("dialects/affine", run_with_insertion_point_v2),
+    ("dialects/func", run_with_insertion_point_v2),
+    ("dialects/arith_dialect", run),
+    ("dialects/arith_llvm", run),
+    ("dialects/async_dialect", run),
+    ("dialects/builtin", run),
+    ("dialects/cf", run_with_insertion_point_v4),
+    ("dialects/complex_dialect", run),
+    ("dialects/func", run_with_insertion_point_v2),
+    ("dialects/index_dialect", run_with_insertion_point),
+    ("dialects/llvm", run_with_insertion_point_v2),
+    ("dialects/math_dialect", run),
+    ("dialects/memref", run),
+    ("dialects/ml_program", run_with_insertion_point_v2),
+    ("dialects/nvgpu", run_with_insertion_point_v2),
+    ("dialects/nvvm", run_with_insertion_point_v2),
+    ("dialects/ods_helpers", run),
+    ("dialects/openmp_ops", run_with_insertion_point_v2),
+    ("dialects/pdl_ops", run_with_insertion_point_v2),
+    # ("dialects/python_test", run),  # TODO: Need to pass pybind11 or nanobind argv
+    ("dialects/quant", run),
+    ("dialects/rocdl", run_with_insertion_point_v2),
+    ("dialects/scf", run_with_insertion_point_v2),
+    ("dialects/shape", run),
+    ("dialects/spirv_dialect", run),
+    ("dialects/tensor", run),
+    # ("dialects/tosa", ),  # Nothing to test
+    ("dialects/transform_bufferization_ext", run_with_insertion_point_v2),
+    # ("dialects/transform_extras", ),  # Needs a more complicated execution schema
+    ("dialects/transform_gpu_ext", run_transform_tensor_ext),
+    (
+        "dialects/transform_interpreter",
+        run_with_context_and_location,
+        ["print_", "transform_options", "failed", "include"],
+    ),
+    (
+        "dialects/transform_loop_ext",
+        run_with_insertion_point_v2,
+        ["loopOutline"],
+    ),
+    ("dialects/transform_memref_ext", run_with_insertion_point_v2),
+    ("dialects/transform_nvgpu_ext", run_with_insertion_point_v2),
+    ("dialects/transform_sparse_tensor_ext", run_transform_tensor_ext),
+    ("dialects/transform_structured_ext", run_transform_structured_ext),
+    ("dialects/transform_tensor_ext", run_transform_tensor_ext),
+    (
+        "dialects/transform_vector_ext",
+        run_apply_patterns,
+        ["configurable_patterns"],
+    ),
+    ("dialects/transform", run_with_insertion_point_v3),
+    ("dialects/vector", run_with_context_and_location),
+    ("dialects/gpu/dialect", run_with_context_and_location),
+    ("dialects/gpu/module-to-binary-nvvm", run_with_context_and_location),
+    ("dialects/gpu/module-to-binary-rocdl", run_with_context_and_location),
+    ("dialects/linalg/ops", run),
+    # TO ADD: No proper tests in this dialects/linalg/opsdsl/*
+    # ("dialects/linalg/opsdsl/*", ...),
+    ("dialects/sparse_tensor/dialect", run),
+    ("dialects/sparse_tensor/passes", run),
+    ("integration/dialects/pdl", run_construct_and_print_in_module),
+    ("integration/dialects/transform", run_construct_and_print_in_module),
+    ("integration/dialects/linalg/opsrun", run),
+    ("ir/affine_expr", run),
+    ("ir/affine_map", run),
+    ("ir/array_attributes", run),
+    ("ir/attributes", run),
+    ("ir/blocks", run),
+    ("ir/builtin_types", run),
+    ("ir/context_managers", run),
+    ("ir/debug", run),
+    ("ir/diagnostic_handler", run),
+    ("ir/dialects", run),
+    ("ir/exception", run),
+    ("ir/insertion_point", run),
+    ("ir/integer_set", run),
+    ("ir/location", run),
+    ("ir/module", run),
+    ("ir/operation", run),
+    ("ir/symbol_table", run),
+    ("ir/value", run),
+]
+
+TESTS_TO_SKIP = [
+    "test_execution_engine__testNanoTime_multi_threaded",  # testNanoTime can't run in multiple threads, even with GIL
+    "test_execution_engine__testSharedLibLoad_multi_threaded",  # testSharedLibLoad can't run in multiple threads, even with GIL
+    "test_dialects_arith_dialect__testArithValue_multi_threaded",  # RuntimeError: Value caster is already registered: <class 'dialects/arith_dialect.testArithValue.<locals>.ArithValue'>, even with GIL
+    "test_ir_dialects__testAppendPrefixSearchPath_multi_threaded",  # PyGlobals::setDialectSearchPrefixes is not thread-safe, even with GIL. Strange usage of static PyGlobals vs python exposed _cext.globals
+    "test_ir_value__testValueCasters_multi_threaded",  # RuntimeError: Value caster is already registered: <function testValueCasters.<locals>.dont_cast_int, even with GIL
+    # tests indirectly calling thread-unsafe llvm::raw_ostream
+    "test_execution_engine__testInvalidModule_multi_threaded",  # mlirExecutionEngineCreate calls thread-unsafe llvm::raw_ostream
+    "test_pass_manager__testPrintIrAfterAll_multi_threaded",  # IRPrinterInstrumentation::runAfterPass calls thread-unsafe llvm::raw_ostream
+    "test_pass_manager__testPrintIrBeforeAndAfterAll_multi_threaded",  # IRPrinterInstrumentation::runBeforePass calls thread-unsafe llvm::raw_ostream
+    "test_pass_manager__testPrintIrLargeLimitElements_multi_threaded",  # IRPrinterInstrumentation::runAfterPass calls thread-unsafe llvm::raw_ostream
+    "test_pass_manager__testPrintIrTree_multi_threaded",  # IRPrinterInstrumentation::runAfterPass calls thread-unsafe llvm::raw_ostream
+    "test_pass_manager__testRunPipeline_multi_threaded",  # PrintOpStatsPass::printSummary calls thread-unsafe llvm::raw_ostream
+    "test_dialects_transform_interpreter__include_multi_threaded",  # mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) calls thread-unsafe llvm::raw_ostream
+    "test_dialects_transform_interpreter__transform_options_multi_threaded",  # mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) calls thread-unsafe llvm::raw_ostream
+    "test_dialects_transform_interpreter__print_self_multi_threaded",  # mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) call thread-unsafe llvm::raw_ostream
+    "test_ir_diagnostic_handler__testDiagnosticCallbackException_multi_threaded",  # mlirEmitError calls thread-unsafe llvm::raw_ostream
+    "test_ir_module__testParseSuccess_multi_threaded",  # mlirOperationDump calls thread-unsafe llvm::raw_ostream
+    # False-positive TSAN detected race in llvm::RuntimeDyldELF::registerEHFrames()
+    # Details: https://github.com/llvm/llvm-project/pull/107103/files#r1905726947
+    "test_execution_engine__testCapsule_multi_threaded",
+    "test_execution_engine__testDumpToObjectFile_multi_threaded",
+]
+
+TESTS_TO_XFAIL = [
+    # execution_engine tests:
+    # - ctypes related data-races: https://github.com/python/cpython/issues/127945
+    "test_execution_engine__testBF16Memref_multi_threaded",
+    "test_execution_engine__testBasicCallback_multi_threaded",
+    "test_execution_engine__testComplexMemrefAdd_multi_threaded",
+    "test_execution_engine__testComplexUnrankedMemrefAdd_multi_threaded",
+    "test_execution_engine__testDynamicMemrefAdd2D_multi_threaded",
+    "test_execution_engine__testF16MemrefAdd_multi_threaded",
+    "test_execution_engine__testF8E5M2Memref_multi_threaded",
+    "test_execution_engine__testInvokeFloatAdd_multi_threaded",
+    "test_execution_engine__testInvokeVoid_multi_threaded",  # a ctypes race
+    "test_execution_engine__testMemrefAdd_multi_threaded",
+    "test_execution_engine__testRankedMemRefCallback_multi_threaded",
+    "test_execution_engine__testRankedMemRefWithOffsetCallback_multi_threaded",
+    "test_execution_engine__testUnrankedMemRefCallback_multi_threaded",
+    "test_execution_engine__testUnrankedMemRefWithOffsetCallback_multi_threaded",
+    # dialects tests
+    "test_dialects_memref__testSubViewOpInferReturnTypeExtensiveSlicing_multi_threaded",  # Related to ctypes data races
+    "test_dialects_transform_interpreter__print_other_multi_threaded",  # Fatal Python error: Aborted or mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) is not thread-safe
+    "test_dialects_gpu_module-to-binary-rocdl__testGPUToASMBin_multi_threaded",  # Due to global llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp::GCNTrackers variable mutation
+    "test_dialects_gpu_module-to-binary-nvvm__testGPUToASMBin_multi_threaded",
+    "test_dialects_gpu_module-to-binary-nvvm__testGPUToLLVMBin_multi_threaded",
+    "test_dialects_gpu_module-to-binary-rocdl__testGPUToLLVMBin_multi_threaded",
+    # integration tests
+    "test_integration_dialects_linalg_opsrun__test_elemwise_builtin_multi_threaded",  # Related to ctypes data races
+    "test_integration_dialects_linalg_opsrun__test_elemwise_generic_multi_threaded",  # Related to ctypes data races
+    "test_integration_dialects_linalg_opsrun__test_fill_builtin_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_fill_generic_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_fill_rng_builtin_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_fill_rng_generic_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_max_pooling_builtin_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_max_pooling_generic_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_min_pooling_builtin_multi_threaded",  # ctypes
+    "test_integration_dialects_linalg_opsrun__test_min_pooling_generic_multi_threaded",  # ctypes
+]
+
+
+def add_existing_tests(test_modules, test_prefix: str = "_original_test"):
+    def decorator(test_cls):
+        this_folder = Path(__file__).parent.absolute()
+        test_cls.output_folder = tempfile.TemporaryDirectory()
+        output_folder = Path(test_cls.output_folder.name)
+
+        for test_mod_info in test_modules:
+            assert isinstance(test_mod_info, tuple) and len(test_mod_info) in (2, 3)
----------------
vfdev-5 wrote:

These are the current tests implementation details:
- 2 is the tuple's length, for example `("ir/affine_expr", run)` and 
- 3 is when we would like to include test functions found in the test module which does not have the name as `test*`:
```python
    (
        "dialects/transform_interpreter",
        run_with_context_and_location,
        ["print_", "transform_options", "failed", "include"],
    ),
```
I can add comments

https://github.com/llvm/llvm-project/pull/107103