[clang] [lld] [llvm] [RFC] Initial reference pass-plugin in LLVM (PR #171111)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 05:04:03 PST 2025
================
@@ -0,0 +1,464 @@
+//===- tools/plugins-shlib/pypass.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cstdlib>
+#include <filesystem>
+#include <memory>
+#include <optional>
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+ DylibPath("pypass-dylib", cl::desc("Path to the Python shared library"),
+ cl::init(""));
+
+static cl::opt<std::string>
+ ScriptPath("pypass-script", cl::desc("Path to the Python script to run"),
+ cl::init(""));
+
+static std::string findPython() {
+ if (!DylibPath.empty())
+ return DylibPath;
+ if (const char *Path = std::getenv("LLVM_PYPASS_DYLIB"))
+ return std::string(Path);
+ // TODO: Run Python from PATH and use a script to query the shared lib
+ return std::string{};
+}
+
+static std::string findScript() {
+ if (!ScriptPath.empty())
+ return ScriptPath;
+ if (const char *Path = std::getenv("LLVM_PYPASS_SCRIPT"))
+ return std::string(Path);
+ return std::string{};
+}
+
+struct PythonAPI {
+ using Py_InitializeEx_t = void(int);
+ using Py_Initialize_t = void(void);
+ using Py_FinalizeEx_t = int(void);
+ using Py_Finalize_t = void(void);
+ using PyDict_GetItemString_t = void *(void *, const char *);
+ using PyGILStateEnsure_t = int();
+ using PyGILStateRelease_t = void(int);
+ using PyImport_AddModule_t = void *(const char *);
+ using PyLong_FromVoidPtr_t = void *(void *);
+ using PyUnicode_FromString_t = void *(const char *);
+ using PyModule_GetDict_t = void *(void *);
+ using PyObject_CallObject_t = void *(void *, void *);
+ using PyObject_IsTrue_t = int(void *);
+ using PyRun_SimpleString_t = int(const char *);
+ using PyTuple_SetItem_t = int(void *, long, void *);
+ using PyTuple_New_t = void *(long);
+ using PyTypeObject_t = void *;
+
+ // pylifecycle.h
+ Py_InitializeEx_t *Py_InitializeEx;
+ Py_Initialize_t *Py_Initialize;
+ Py_FinalizeEx_t *Py_FinalizeEx;
+ Py_Finalize_t *Py_Finalize;
+
+ // pythonrun.h
+ PyRun_SimpleString_t *PyRun_SimpleString;
+
+ // pystate.h
+ PyGILStateEnsure_t *PyGILState_Ensure;
+ PyGILStateRelease_t *PyGILState_Release;
+
+ // import.h
+ PyImport_AddModule_t *PyImport_AddModule;
+
+ // object.h
+ PyObject_IsTrue_t *PyObject_IsTrue;
+
+ // moduleobject.h
+ PyModule_GetDict_t *PyModule_GetDict;
+
+ // dictobject.h
+ PyDict_GetItemString_t *PyDict_GetItemString;
+
+ // abstract.h
+ PyObject_CallObject_t *PyObject_CallObject;
+
+ // longobject.h
+ PyLong_FromVoidPtr_t *PyLong_FromVoidPtr;
+
+ // unicodeobject.h
+ PyUnicode_FromString_t *PyUnicode_FromString;
+
+ // tupleobject.h
+ PyTuple_SetItem_t *PyTuple_SetItem;
+ PyTuple_New_t *PyTuple_New;
+
+ PythonAPI() : Ready(false) {
+ if (!loadDylib(findPython()))
+ return;
+ if (!resolveSymbols())
+ return;
+ if (Py_InitializeEx) {
+ Py_InitializeEx(0);
+ } else {
+ Py_Initialize();
+ }
+ Ready = true;
+ }
+
+ ~PythonAPI() {
+ if (std::atomic_exchange(&Ready, false)) {
+ if (Py_FinalizeEx) {
+ Py_FinalizeEx();
+ } else {
+ Py_Finalize();
+ }
+ }
+ }
+
+ bool loadDylib(std::string Path) {
+ std::string Err;
+ Dylib = sys::DynamicLibrary::getPermanentLibrary(Path.c_str(), &Err);
+ if (!Dylib.isValid()) {
+ errs() << "dlopen for '" << Path << "' failed: " << Err << "\n";
+ return false;
+ }
+
+ return true;
+ }
+
+ bool resolveSymbols() {
+ bool Success = true;
+ Success &= resolve("Py_InitializeEx", &Py_InitializeEx);
+ Success &= resolve("Py_Initialize", &Py_Initialize);
+ Success &= resolve("Py_FinalizeEx", &Py_FinalizeEx);
+ Success &= resolve("Py_Finalize", &Py_Finalize);
+ Success &= resolve("PyGILState_Ensure", &PyGILState_Ensure);
+ Success &= resolve("PyGILState_Release", &PyGILState_Release);
+ Success &= resolve("PyRun_SimpleString", &PyRun_SimpleString);
+ Success &= resolve("PyImport_AddModule", &PyImport_AddModule);
+ Success &= resolve("PyModule_GetDict", &PyModule_GetDict);
+ Success &= resolve("PyDict_GetItemString", &PyDict_GetItemString);
+ Success &= resolve("PyObject_CallObject", &PyObject_CallObject);
+ Success &= resolve("PyObject_IsTrue", &PyObject_IsTrue);
+ Success &= resolve("PyLong_FromVoidPtr", &PyLong_FromVoidPtr);
+ Success &= resolve("PyUnicode_FromString", &PyUnicode_FromString);
+ Success &= resolve("PyTuple_SetItem", &PyTuple_SetItem);
+ Success &= resolve("PyTuple_New", &PyTuple_New);
+ return Success;
+ }
+
+ bool isReady() const { return Ready; }
+
+ bool loadScript(const std::string &ScriptPath) const {
+ std::string LoadCmd;
+ raw_string_ostream(LoadCmd)
+ << "import runpy\n"
+ << "globals().update(runpy.run_path('" << ScriptPath << "'))";
+
+ if (PyRun_SimpleString(LoadCmd.c_str()) != 0) {
+ errs() << "Failed to load script: " << ScriptPath << "\n";
+ return false;
+ }
+
+ return true;
+ }
+
+ bool addImportSearchPath(std::string Path) const {
+ std::string LoadCmd;
+ raw_string_ostream(LoadCmd) << "import sys\n"
+ << "sys.path.append('" << Path << "')";
+ // Interpreter is not thread-safe
+ auto GIL = make_scope_exit(
+ [this, Lock = PyGILState_Ensure()]() { PyGILState_Release(Lock); });
+ if (PyRun_SimpleString(LoadCmd.c_str()) != 0) {
+ errs() << "Failed to add import search path: " << Path << "\n";
+ return false;
+ }
+
+ return true;
+ }
+
+ void *addModule(const char *Name) const {
+ void *Mod = PyImport_AddModule(Name);
+ return PyModule_GetDict(Mod);
+ }
+
+ // Very simple interface to execute a Python function
+ bool invoke(void *Mod, const char *Name, void *Args = nullptr) const {
+ // If the function doesn't exist, we assume no
+ void *Fn = PyDict_GetItemString(Mod, Name);
+ if (!Fn)
+ return false;
+ // Interpreter is not thread-safe
+ auto GIL = make_scope_exit(
+ [this, Lock = PyGILState_Ensure()]() { PyGILState_Release(Lock); });
+ // If we get no result, there was an error in Python
+ void *Result = PyObject_CallObject(Fn, Args);
+ if (!Result) {
+ errs() << "PyPassContext error: " << Name << "() failed\n";
+ return false;
+ }
+ // If the result is truthy, then it's a yes
+ return PyObject_IsTrue(Result);
+ }
+
+private:
+ sys::DynamicLibrary Dylib;
+ std::atomic<bool> Ready;
+
+ template <typename FnTy> bool resolve(const char *Name, FnTy **Var) {
+ assert(Dylib.isValid() && "dlopen shared library first");
+ assert(*Var == nullptr && "Resolve symbols once");
+ if (void *FnPtr = Dylib.getAddressOfSymbol(Name)) {
+ *Var = reinterpret_cast<FnTy *>(FnPtr);
+ return true;
+ }
+ errs() << "Missing required CPython API symbol '" << Name
+ << "' in: " << DylibPath << "\n";
+ return false;
+ };
+};
+
+// Python interface is initialized on first access and it is shared across all
+// threads. It can be used like a state-less thread-safe object.
+const PythonAPI &getPyAPI() {
+ static const PythonAPI PyAPI;
----------------
serge-sans-paille wrote:
If this was a friend function of `PythonAPI` returning a (unique) pointer and `PythonAPI` constructor's were private, we could remove the need for thread-safe `Read` member because static local variables guarentee a kind of thread-safety.
https://github.com/llvm/llvm-project/pull/171111
More information about the llvm-commits
mailing list