[Mlir-commits] [mlir] Added free-threading CPython mode support in MLIR Python bindings (PR #107103)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Jan 10 07:07:10 PST 2025
================
@@ -0,0 +1,518 @@
+# RUN: %PYTHON %s
+"""
+This script generates multi-threaded tests to check free-threading mode using CPython compiled with TSAN.
+Tests can be run using pytest:
+```bash
+python3.13t -mpytest -vvv multithreaded_tests.py
+```
+
+IMPORTANT. Running tests are not checking the correctness, but just the execution of the tests in multi-threaded context
+and passing if no warnings reported by TSAN and failing otherwise.
+
+
+Details on the generated tests and execution:
+1) Multi-threaded execution: all generated tests are executed independently by
+a pool of threads, running each test multiple times, see @multi_threaded for details
+
+2) Tests generation: we use existing tests: test/python/ir/*.py,
+test/python/dialects/*.py, etc to generate multi-threaded tests.
+In details, we perform the following:
+a) we define a list of source tests to be used to generate multi-threaded tests, see `TEST_MODULES`.
+b) we define `TestAllMultiThreaded` class and add existing tests to the class. See `add_existing_tests` method.
+c) for each test file, we copy and modify it: test/python/ir/affine_expr.py -> /tmp/ir/affine_expr.py.
+In order to import the test file as python module, we remove all executing functions, like
+`@run` or `run(testMethod)`. See `copy_and_update` and `add_existing_tests` methods for details.
+
+
+Observed warnings reported by TSAN.
+
+CPython and free-threading known data-races:
+1) ctypes related races: https://github.com/python/cpython/issues/127945
+2) LLVM related data-races, llvm::raw_ostream is not thread-safe
+- mlir pass manager
+- dialects/transform_interpreter.py
+- ir/diagnostic_handler.py
+- ir/module.py
+3) Dialect gpu module-to-binary method is unsafe
+"""
+import concurrent.futures
+import gc
+import importlib.util
+import os
+import sys
+import threading
+import tempfile
+import unittest
+
+from contextlib import contextmanager
+from functools import partial
+from pathlib import Path
+from typing import Optional
+
+import mlir.dialects.arith as arith
+from mlir.dialects import transform
+from mlir.ir import Context, Location, Module, IntegerType, InsertionPoint
+
+
+def import_from_path(module_name: str, file_path: Path):
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[module_name] = module
+ spec.loader.exec_module(module)
+ return module
+
+
+def copy_and_update(src_filepath: Path, dst_filepath: Path):
+ # We should remove all calls like `run(testMethod)`
+ with open(src_filepath, "r") as reader, open(dst_filepath, "w") as writer:
+ while True:
+ src_line = reader.readline()
+ if len(src_line) == 0:
+ break
+ skip_lines = [
+ "run(",
+ "@run",
+ "@constructAndPrintInModule",
+ "run_apply_patterns(",
+ "@run_apply_patterns",
+ "@test_in_context",
+ "@construct_and_print_in_module",
+ ]
+ if any(src_line.startswith(line) for line in skip_lines):
+ continue
+ writer.write(src_line)
+
+
+# Helper run functions
+def run(f):
+ f()
+
+
+def run_with_context_and_location(f):
+ print("\nTEST:", f.__name__)
+ with Context(), Location.unknown():
+ f()
+ return f
+
+
+def run_with_insertion_point(f):
+ print("\nTEST:", f.__name__)
+ with Context() as ctx, Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ f(ctx)
+ print(module)
+
+
+def run_with_insertion_point_v2(f):
+ print("\nTEST:", f.__name__)
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ f()
+ print(module)
+ return f
+
+
+def run_with_insertion_point_v3(f):
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ print("\nTEST:", f.__name__)
+ f(module)
+ print(module)
+ return f
+
+
+def run_with_insertion_point_v4(f):
+ print("\nTEST:", f.__name__)
+ with Context() as ctx, Location.unknown():
+ ctx.allow_unregistered_dialects = True
+ module = Module.create()
+ with InsertionPoint(module.body):
+ f()
+ return f
+
+
+def run_apply_patterns(f):
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.AnyOpType.get(),
+ )
+ with InsertionPoint(sequence.body):
+ apply = transform.ApplyPatternsOp(sequence.bodyTarget)
+ with InsertionPoint(apply.patterns):
+ f()
+ transform.YieldOp()
+ print("\nTEST:", f.__name__)
+ print(module)
+ return f
+
+
+def run_transform_tensor_ext(f):
+ print("\nTEST:", f.__name__)
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.AnyOpType.get(),
+ )
+ with InsertionPoint(sequence.body):
+ f(sequence.bodyTarget)
+ transform.YieldOp()
+ print(module)
+ return f
+
+
+def run_transform_structured_ext(f):
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ print("\nTEST:", f.__name__)
+ f()
+ module.operation.verify()
+ print(module)
+ return f
+
+
+def run_construct_and_print_in_module(f):
+ print("\nTEST:", f.__name__)
+ with Context(), Location.unknown():
+ module = Module.create()
+ with InsertionPoint(module.body):
+ module = f(module)
+ if module is not None:
+ print(module)
+ return f
+
+
+TEST_MODULES = [
+ ("execution_engine", run),
+ ("pass_manager", run),
+ ("dialects/affine", run_with_insertion_point_v2),
+ ("dialects/func", run_with_insertion_point_v2),
+ ("dialects/arith_dialect", run),
+ ("dialects/arith_llvm", run),
+ ("dialects/async_dialect", run),
+ ("dialects/builtin", run),
+ ("dialects/cf", run_with_insertion_point_v4),
+ ("dialects/complex_dialect", run),
+ ("dialects/func", run_with_insertion_point_v2),
+ ("dialects/index_dialect", run_with_insertion_point),
+ ("dialects/llvm", run_with_insertion_point_v2),
+ ("dialects/math_dialect", run),
+ ("dialects/memref", run),
+ ("dialects/ml_program", run_with_insertion_point_v2),
+ ("dialects/nvgpu", run_with_insertion_point_v2),
+ ("dialects/nvvm", run_with_insertion_point_v2),
+ ("dialects/ods_helpers", run),
+ ("dialects/openmp_ops", run_with_insertion_point_v2),
+ ("dialects/pdl_ops", run_with_insertion_point_v2),
+ # ("dialects/python_test", run), # TODO: Need to pass pybind11 or nanobind argv
+ ("dialects/quant", run),
+ ("dialects/rocdl", run_with_insertion_point_v2),
+ ("dialects/scf", run_with_insertion_point_v2),
+ ("dialects/shape", run),
+ ("dialects/spirv_dialect", run),
+ ("dialects/tensor", run),
+ # ("dialects/tosa", ), # Nothing to test
+ ("dialects/transform_bufferization_ext", run_with_insertion_point_v2),
+ # ("dialects/transform_extras", ), # Needs a more complicated execution schema
+ ("dialects/transform_gpu_ext", run_transform_tensor_ext),
+ (
+ "dialects/transform_interpreter",
+ run_with_context_and_location,
+ ["print_", "transform_options", "failed", "include"],
+ ),
+ (
+ "dialects/transform_loop_ext",
+ run_with_insertion_point_v2,
+ ["loopOutline"],
+ ),
+ ("dialects/transform_memref_ext", run_with_insertion_point_v2),
+ ("dialects/transform_nvgpu_ext", run_with_insertion_point_v2),
+ ("dialects/transform_sparse_tensor_ext", run_transform_tensor_ext),
+ ("dialects/transform_structured_ext", run_transform_structured_ext),
+ ("dialects/transform_tensor_ext", run_transform_tensor_ext),
+ (
+ "dialects/transform_vector_ext",
+ run_apply_patterns,
+ ["configurable_patterns"],
+ ),
+ ("dialects/transform", run_with_insertion_point_v3),
+ ("dialects/vector", run_with_context_and_location),
+ ("dialects/gpu/dialect", run_with_context_and_location),
+ ("dialects/gpu/module-to-binary-nvvm", run_with_context_and_location),
+ ("dialects/gpu/module-to-binary-rocdl", run_with_context_and_location),
+ ("dialects/linalg/ops", run),
+ # TO ADD: No proper tests in this dialects/linalg/opsdsl/*
+ # ("dialects/linalg/opsdsl/*", ...),
+ ("dialects/sparse_tensor/dialect", run),
+ ("dialects/sparse_tensor/passes", run),
+ ("integration/dialects/pdl", run_construct_and_print_in_module),
+ ("integration/dialects/transform", run_construct_and_print_in_module),
+ ("integration/dialects/linalg/opsrun", run),
+ ("ir/affine_expr", run),
+ ("ir/affine_map", run),
+ ("ir/array_attributes", run),
+ ("ir/attributes", run),
+ ("ir/blocks", run),
+ ("ir/builtin_types", run),
+ ("ir/context_managers", run),
+ ("ir/debug", run),
+ ("ir/diagnostic_handler", run),
+ ("ir/dialects", run),
+ ("ir/exception", run),
+ ("ir/insertion_point", run),
+ ("ir/integer_set", run),
+ ("ir/location", run),
+ ("ir/module", run),
+ ("ir/operation", run),
+ ("ir/symbol_table", run),
+ ("ir/value", run),
+]
+
+TESTS_TO_SKIP = [
+ "test_execution_engine__testNanoTime_multi_threaded", # testNanoTime can't run in multiple threads, even with GIL
+ "test_execution_engine__testSharedLibLoad_multi_threaded", # testSharedLibLoad can't run in multiple threads, even with GIL
+ "test_dialects_arith_dialect__testArithValue_multi_threaded", # RuntimeError: Value caster is already registered: <class 'dialects/arith_dialect.testArithValue.<locals>.ArithValue'>, even with GIL
+ "test_ir_dialects__testAppendPrefixSearchPath_multi_threaded", # PyGlobals::setDialectSearchPrefixes is not thread-safe, even with GIL. Strange usage of static PyGlobals vs python exposed _cext.globals
+ "test_ir_value__testValueCasters_multi_threaded", # RuntimeError: Value caster is already registered: <function testValueCasters.<locals>.dont_cast_int, even with GIL
+ # tests indirectly calling thread-unsafe llvm::raw_ostream
+ "test_execution_engine__testInvalidModule_multi_threaded", # mlirExecutionEngineCreate calls thread-unsafe llvm::raw_ostream
+ "test_pass_manager__testPrintIrAfterAll_multi_threaded", # IRPrinterInstrumentation::runAfterPass calls thread-unsafe llvm::raw_ostream
+ "test_pass_manager__testPrintIrBeforeAndAfterAll_multi_threaded", # IRPrinterInstrumentation::runBeforePass calls thread-unsafe llvm::raw_ostream
+ "test_pass_manager__testPrintIrLargeLimitElements_multi_threaded", # IRPrinterInstrumentation::runAfterPass calls thread-unsafe llvm::raw_ostream
+ "test_pass_manager__testPrintIrTree_multi_threaded", # IRPrinterInstrumentation::runAfterPass calls thread-unsafe llvm::raw_ostream
+ "test_pass_manager__testRunPipeline_multi_threaded", # PrintOpStatsPass::printSummary calls thread-unsafe llvm::raw_ostream
+ "test_dialects_transform_interpreter__include_multi_threaded", # mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) calls thread-unsafe llvm::raw_ostream
+ "test_dialects_transform_interpreter__transform_options_multi_threaded", # mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) calls thread-unsafe llvm::raw_ostream
+ "test_dialects_transform_interpreter__print_self_multi_threaded", # mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) call thread-unsafe llvm::raw_ostream
+ "test_ir_diagnostic_handler__testDiagnosticCallbackException_multi_threaded", # mlirEmitError calls thread-unsafe llvm::raw_ostream
+ "test_ir_module__testParseSuccess_multi_threaded", # mlirOperationDump calls thread-unsafe llvm::raw_ostream
+ # False-positive TSAN detected race in llvm::RuntimeDyldELF::registerEHFrames()
+ # Details: https://github.com/llvm/llvm-project/pull/107103/files#r1905726947
+ "test_execution_engine__testCapsule_multi_threaded",
+ "test_execution_engine__testDumpToObjectFile_multi_threaded",
+]
+
+TESTS_TO_XFAIL = [
+ # execution_engine tests:
+ # - ctypes related data-races: https://github.com/python/cpython/issues/127945
+ "test_execution_engine__testBF16Memref_multi_threaded",
+ "test_execution_engine__testBasicCallback_multi_threaded",
+ "test_execution_engine__testComplexMemrefAdd_multi_threaded",
+ "test_execution_engine__testComplexUnrankedMemrefAdd_multi_threaded",
+ "test_execution_engine__testDynamicMemrefAdd2D_multi_threaded",
+ "test_execution_engine__testF16MemrefAdd_multi_threaded",
+ "test_execution_engine__testF8E5M2Memref_multi_threaded",
+ "test_execution_engine__testInvokeFloatAdd_multi_threaded",
+ "test_execution_engine__testInvokeVoid_multi_threaded", # a ctypes race
+ "test_execution_engine__testMemrefAdd_multi_threaded",
+ "test_execution_engine__testRankedMemRefCallback_multi_threaded",
+ "test_execution_engine__testRankedMemRefWithOffsetCallback_multi_threaded",
+ "test_execution_engine__testUnrankedMemRefCallback_multi_threaded",
+ "test_execution_engine__testUnrankedMemRefWithOffsetCallback_multi_threaded",
+ # dialects tests
+ "test_dialects_memref__testSubViewOpInferReturnTypeExtensiveSlicing_multi_threaded", # Related to ctypes data races
+ "test_dialects_transform_interpreter__print_other_multi_threaded", # Fatal Python error: Aborted or mlir::transform::PrintOp::apply(mlir::transform::TransformRewriter...) is not thread-safe
+ "test_dialects_gpu_module-to-binary-rocdl__testGPUToASMBin_multi_threaded", # Due to global llvm-project/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp::GCNTrackers variable mutation
+ "test_dialects_gpu_module-to-binary-nvvm__testGPUToASMBin_multi_threaded",
+ "test_dialects_gpu_module-to-binary-nvvm__testGPUToLLVMBin_multi_threaded",
+ "test_dialects_gpu_module-to-binary-rocdl__testGPUToLLVMBin_multi_threaded",
+ # integration tests
+ "test_integration_dialects_linalg_opsrun__test_elemwise_builtin_multi_threaded", # Related to ctypes data races
+ "test_integration_dialects_linalg_opsrun__test_elemwise_generic_multi_threaded", # Related to ctypes data races
+ "test_integration_dialects_linalg_opsrun__test_fill_builtin_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_fill_generic_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_fill_rng_builtin_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_fill_rng_generic_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_max_pooling_builtin_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_max_pooling_generic_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_min_pooling_builtin_multi_threaded", # ctypes
+ "test_integration_dialects_linalg_opsrun__test_min_pooling_generic_multi_threaded", # ctypes
+]
+
+
+def add_existing_tests(test_modules, test_prefix: str = "_original_test"):
+ def decorator(test_cls):
+ this_folder = Path(__file__).parent.absolute()
+ test_cls.output_folder = tempfile.TemporaryDirectory()
+ output_folder = Path(test_cls.output_folder.name)
+
+ for test_mod_info in test_modules:
+ assert isinstance(test_mod_info, tuple) and len(test_mod_info) in (2, 3)
----------------
vfdev-5 wrote:
These are the current tests implementation details:
- 2 is the tuple's length, for example `("ir/affine_expr", run)` and
- 3 is when we would like to include test functions found in the test module which does not have the name as `test*`:
```python
(
"dialects/transform_interpreter",
run_with_context_and_location,
["print_", "transform_options", "failed", "include"],
),
```
I can add comments
https://github.com/llvm/llvm-project/pull/107103
More information about the Mlir-commits
mailing list