[clang] [clang-bindings] Add strict typing to clang Python bindings (#76664) (PR #78114)

via cfe-commits cfe-commits at lists.llvm.org
Sun Jan 14 13:32:38 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Jannick Kremer (DeinAlptraum)

<details>
<summary>Changes</summary>

This PR adds type annotations to the clang Python bindings, to the point that they pass a strict MyPy check.
This utilizes Python features up to 3.7, thus ending support of the bindings for versions <= 3.6 (see also discussion here: https://discourse.llvm.org/t/type-annotations-for-libclang-python-bindings/70644). I thus removed some compatibility stuff for older Python versions.

I tried to change as little as possible, but sometimes passing the type check without changing semantics would have been impossible. Such changes mostly come down to e.g. returning an empty string instead of `None` and similar.
Some changes might look unrelated at first glance, but everything was done specifically to resolve typing errors.

I've left comments in some other places where I'm unsure about my changes, especially where they affect semantics.



---

Patch is 151.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78114.diff


1 Files Affected:

- (modified) clang/bindings/python/clang/cindex.py (+1733-1201) 


``````````diff
diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index d780ee353a133cb..809527e712acded 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -43,7 +43,7 @@
 Most object information is exposed using properties, when the underlying API
 call is efficient.
 """
-from __future__ import absolute_import, division, print_function
+from __future__ import annotations
 
 # TODO
 # ====
@@ -67,89 +67,690 @@
 import clang.enumerations
 
 import os
-import sys
-
-if sys.version_info[0] == 3:
-    # Python 3 strings are unicode, translate them to/from utf8 for C-interop.
-    class c_interop_string(c_char_p):
-        def __init__(self, p=None):
-            if p is None:
-                p = ""
-            if isinstance(p, str):
-                p = p.encode("utf8")
-            super(c_char_p, self).__init__(p)
-
-        def __str__(self):
-            return self.value
-
-        @property
-        def value(self):
-            if super(c_char_p, self).value is None:
-                return None
-            return super(c_char_p, self).value.decode("utf8")
-
-        @classmethod
-        def from_param(cls, param):
-            if isinstance(param, str):
-                return cls(param)
-            if isinstance(param, bytes):
-                return cls(param)
-            if param is None:
-                # Support passing null to C functions expecting char arrays
-                return None
-            raise TypeError(
-                "Cannot convert '{}' to '{}'".format(type(param).__name__, cls.__name__)
-            )
+from enum import Enum
+
+from typing import (
+    Any,
+    Callable,
+    cast as Tcast,
+    Generic,
+    Iterable,
+    Iterator,
+    Optional,
+    Sequence,
+    Type as TType,
+    TypeVar,
+    TYPE_CHECKING,
+    Union as TUnion,
+)
+from typing_extensions import Protocol, TypeAlias
+
+if TYPE_CHECKING:
+    from ctypes import _Pointer, _FuncPointer, _CArgObject
+    from io import TextIOWrapper
+
+    StrPath: TypeAlias = TUnion[str, os.PathLike[str]]
+    InMemoryFile: TypeAlias = (
+        "tuple[TUnion[str, os.PathLike[Any]], TUnion[str, TextIOWrapper]]"
+    )
+    LibFunc: TypeAlias = TUnion[
+        "tuple[str, Optional[list[Any]]]",
+        "tuple[str, Optional[list[Any]], Any]",
+        "tuple[str, Optional[list[Any]], Any, Callable[..., Any]]",
+    ]
+    CObjP: TypeAlias = _Pointer[Any]
 
-        @staticmethod
-        def to_python_string(x, *args):
-            return x.value
+    TSeq = TypeVar("TSeq", covariant=True)
 
-    def b(x):
-        if isinstance(x, bytes):
-            return x
-        return x.encode("utf8")
+    class NoSliceSequence(Protocol[TSeq]):
+        def __len__(self) -> int:
+            ...
 
-elif sys.version_info[0] == 2:
-    # Python 2 strings are utf8 byte strings, no translation is needed for
-    # C-interop.
-    c_interop_string = c_char_p
+        def __getitem__(self, key: int) -> TSeq:
+            ...
 
-    def _to_python_string(x, *args):
-        return x
 
-    c_interop_string.to_python_string = staticmethod(_to_python_string)
+class ClangLib(Protocol):
+    def clang_annotateTokens(
+        self, tu: TranslationUnit, token: _CArgObject, num: int, cursor: _CArgObject
+    ) -> None:
+        ...
 
-    def b(x):
-        return x
+    def clang_CompilationDatabase_dispose(self, cdb: CompilationDatabase) -> None:
+        ...
+
+    def clang_CompilationDatabase_fromDirectory(
+        self, buildDir: str, errorCode: _CArgObject
+    ) -> CompilationDatabase:
+        ...
+
+    def clang_CompilationDatabase_getAllCompileCommands(
+        self, cdb: CompilationDatabase
+    ) -> CompileCommands:
+        ...
+
+    def clang_CompilationDatabase_getCompileCommands(
+        self, cdb: CompilationDatabase, filename: str
+    ) -> CompileCommands:
+        ...
+
+    def clang_CompileCommands_dispose(self, ccmds: CObjP) -> None:
+        ...
+
+    def clang_CompileCommands_getCommand(self, ccmds: CObjP, key: int) -> CObjP:
+        ...
+
+    def clang_CompileCommands_getSize(self, ccmds: CObjP) -> c_uint:
+        ...
+
+    def clang_CompileCommand_getArg(self, cmd: CObjP, key: int) -> str:
+        ...
+
+    def clang_CompileCommand_getDirectory(self, cmd: CObjP) -> str:
+        ...
+
+    def clang_CompileCommand_getFilename(self, cmd: CObjP) -> str:
+        ...
+
+    def clang_CompileCommand_getNumArgs(self, cmd: CObjP) -> int:
+        ...
+
+    def clang_codeCompleteAt(
+        self,
+        tu: TranslationUnit,
+        filename: str,
+        line: int,
+        column: int,
+        unsaved_files: TUnion[int, Array[_CXUnsavedFile]],
+        num_unsaved_files: int,
+        options: int,
+    ) -> _Pointer[CCRStructure]:
+        ...
+
+    def clang_codeCompleteGetDiagnostic(
+        self, ccrs: CodeCompletionResults, key: int
+    ) -> Diagnostic:
+        ...
+
+    def clang_codeCompleteGetNumDiagnostics(self, ccrs: CodeCompletionResults) -> c_int:
+        ...
+
+    def clang_createIndex(self, excludeDecls: int, displayDiagnostics: int) -> CObjP:
+        ...
+
+    def clang_createTranslationUnit(self, index: Index, filename: str) -> CObjP:
+        ...
+
+    def clang_CXXConstructor_isConvertingConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXConstructor_isCopyConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXConstructor_isDefaultConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXConstructor_isMoveConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXField_isMutable(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isConst(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isDefaulted(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isDeleted(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isCopyAssignmentOperator(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isMoveAssignmentOperator(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isExplicit(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isPureVirtual(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isStatic(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isVirtual(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXRecord_isAbstract(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_EnumDecl_isScoped(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_defaultDiagnosticDisplayOptions(self) -> int:
+        ...
+
+    def clang_defaultSaveOptions(self, tu: TranslationUnit) -> c_uint:
+        ...
+
+    def clang_disposeCodeCompleteResults(self, tu: CodeCompletionResults) -> None:
+        ...
+
+    def clang_disposeDiagnostic(self, diag: Diagnostic) -> None:
+        ...
+
+    def clang_disposeIndex(self, index: Index) -> None:
+        ...
+
+    def clang_disposeString(self, string: _CXString) -> None:
+        ...
+
+    def clang_disposeTokens(
+        self, tu: TranslationUnit, tokens: _Pointer[Token], count: c_uint
+    ) -> None:
+        ...
+
+    def clang_disposeTranslationUnit(self, tu: TranslationUnit) -> None:
+        ...
+
+    def clang_equalCursors(self, self_c: Cursor, other_c: Cursor) -> bool:
+        ...
+
+    def clang_equalLocations(
+        self, self_loc: SourceLocation, other_loc: SourceLocation
+    ) -> bool:
+        ...
+
+    def clang_equalRanges(self, self_r: SourceRange, other_r: SourceRange) -> bool:
+        ...
+
+    def clang_equalTypes(self, self_t: Type, other_t: Type) -> bool:
+        ...
+
+    def clang_formatDiagnostic(self, diag: Diagnostic, options: int) -> str:
+        ...
+
+    def clang_getAddressSpace(self, type: Type) -> int:
+        ...
+
+    def clang_getArgType(self, parent: Type, key: int) -> Type:
+        ...
+
+    def clang_getArrayElementType(self, type: Type) -> Type:
+        ...
+
+    def clang_getArraySize(self, type: Type) -> int:
+        ...
+
+    def clang_getFieldDeclBitWidth(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCanonicalCursor(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCanonicalType(self, type: Type) -> Type:
+        ...
+
+    def clang_getChildDiagnostics(self, diag: Diagnostic) -> CObjP:
+        ...
+
+    def clang_getCompletionAvailability(self, cs_obj: CObjP) -> int:
+        ...
+
+    def clang_getCompletionBriefComment(self, cs_obj: CObjP) -> str:
+        ...
+
+    def clang_getCompletionChunkCompletionString(
+        self, cs_obj: CObjP, key: int
+    ) -> CObjP:
+        ...
+
+    def clang_getCompletionChunkKind(self, cs_obj: CObjP, key: int) -> int:
+        ...
+
+    def clang_getCompletionChunkText(self, cs_obj: CObjP, key: int) -> str:
+        ...
+
+    def clang_getCompletionPriority(self, cs_obj: CObjP) -> int:
+        ...
+
+    def clang_getCString(self, string: _CXString) -> str | None:
+        ...
+
+    def clang_getCursor(self, tu: TranslationUnit, loc: SourceLocation) -> Cursor:
+        ...
+
+    def clang_getCursorAvailability(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorDefinition(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorDisplayName(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getCursorExceptionSpecificationType(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorExtent(self, cursor: Cursor) -> SourceRange:
+        ...
+
+    def clang_getCursorLexicalParent(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorLinkage(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorLocation(self, cursor: Cursor) -> SourceLocation:
+        ...
+
+    def clang_getCursorReferenced(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorResultType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getCursorSemanticParent(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorSpelling(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getCursorTLSKind(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getCursorUSR(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_Cursor_getMangling(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getCXXAccessSpecifier(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getDeclObjCTypeEncoding(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getDiagnostic(self, tu: TranslationUnit, key: int) -> CObjP:
+        ...
+
+    def clang_getDiagnosticCategory(self, diag: Diagnostic) -> int:
+        ...
+
+    def clang_getDiagnosticCategoryText(self, diag: Diagnostic) -> str:
+        ...
+
+    def clang_getDiagnosticFixIt(
+        self, diag: Diagnostic, key: int, sr: _CArgObject
+    ) -> str:
+        ...
+
+    def clang_getDiagnosticInSet(self, diag_set: CObjP, key: int) -> CObjP:
+        ...
+
+    def clang_getDiagnosticLocation(self, diag: Diagnostic) -> SourceLocation:
+        ...
+
+    def clang_getDiagnosticNumFixIts(self, diag: Diagnostic) -> c_uint:
+        ...
+
+    def clang_getDiagnosticNumRanges(self, diag: Diagnostic) -> c_uint:
+        ...
+
+    def clang_getDiagnosticOption(
+        self, diag: Diagnostic, disable: _CArgObject | None
+    ) -> str:
+        ...
+
+    def clang_getDiagnosticRange(self, diag: Diagnostic, key: int) -> SourceRange:
+        ...
+
+    def clang_getDiagnosticSeverity(self, diag: Diagnostic) -> int:
+        ...
+
+    def clang_getDiagnosticSpelling(self, diag: Diagnostic) -> str:
+        ...
+
+    def clang_getElementType(self, type: Type) -> Type:
+        ...
+
+    def clang_getEnumConstantDeclUnsignedValue(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getEnumConstantDeclValue(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getEnumDeclIntegerType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getExceptionSpecificationType(self, type: Type) -> int:
+        ...
+
+    def clang_getFile(self, tu: TranslationUnit, filename: str) -> CObjP:
+        ...
+
+    def clang_getFileName(self, file: File) -> str:
+        ...
+
+    def clang_getFileTime(self, file: File) -> int:
+        ...
+
+    def clang_getIncludedFile(self, cursor: Cursor) -> File:
+        ...
+
+    def clang_getInclusions(
+        self,
+        tu: TranslationUnit,
+        callback: _FuncPointer,
+        inclusions: list[FileInclusion],
+    ) -> None:
+        ...
+
+    def clang_getInstantiationLocation(
+        self,
+        loc: SourceLocation,
+        file: _CArgObject,
+        line: _CArgObject,
+        column: _CArgObject,
+        offset: _CArgObject,
+    ) -> None:
+        ...
+
+    def clang_getLocation(
+        self, tu: TranslationUnit, file: File, line: int, column: int
+    ) -> SourceLocation:
+        ...
+
+    def clang_getLocationForOffset(
+        self, tu: TranslationUnit, file: File, offset: int
+    ) -> SourceLocation:
+        ...
+
+    def clang_getNullCursor(self) -> Cursor:
+        ...
+
+    def clang_getNumArgTypes(self, type: Type) -> int:
+        ...
+
+    def clang_getNumCompletionChunks(self, cs_obj: CObjP) -> int:
+        ...
+
+    def clang_getNumDiagnostics(self, tu: TranslationUnit) -> c_uint:
+        ...
+
+    def clang_getNumDiagnosticsInSet(self, diag_set: CObjP) -> c_uint:
+        ...
+
+    def clang_getNumElements(self, type: Type) -> int:
+        ...
+
+    def clang_getPointeeType(self, type: Type) -> Type:
+        ...
+
+    def clang_getRange(self, start: SourceLocation, end: SourceLocation) -> SourceRange:
+        ...
+
+    def clang_getRangeEnd(self, range: SourceRange) -> SourceLocation:
+        ...
+
+    def clang_getRangeStart(self, range: SourceRange) -> SourceLocation:
+        ...
+
+    def clang_getResultType(self, type: Type) -> Type:
+        ...
+
+    def clang_getTokenExtent(self, tu: TranslationUnit, token: Token) -> SourceRange:
+        ...
+
+    def clang_getTokenKind(self, token: Token) -> int:
+        ...
+
+    def clang_getTokenLocation(
+        self, tu: TranslationUnit, token: Token
+    ) -> SourceLocation:
+        ...
+
+    def clang_getTokenSpelling(self, tu: TranslationUnit, token: Token) -> str:
+        ...
+
+    def clang_getTranslationUnitCursor(self, tu: TranslationUnit) -> Cursor:
+        ...
+
+    def clang_getTranslationUnitSpelling(self, tu: TranslationUnit) -> str:
+        ...
+
+    def clang_getTypeDeclaration(self, type: Type) -> Cursor:
+        ...
+
+    def clang_getTypedefDeclUnderlyingType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getTypedefName(self, type: Type) -> str:
+        ...
+
+    def clang_getTypeKindSpelling(self, kind: int) -> str:
+        ...
+
+    def clang_getTypeSpelling(self, type: Type) -> str:
+        ...
+
+    def clang_hashCursor(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_isAttribute(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isConstQualifiedType(self, type: Type) -> bool:
+        ...
+
+    def clang_isCursorDefinition(self, cursor: Cursor) -> bool:
+        ...
 
+    def clang_isDeclaration(self, kind: CursorKind) -> bool:
+        ...
 
-# Importing ABC-s directly from collections is deprecated since Python 3.7,
-# will stop working in Python 3.8.
-# See: https://docs.python.org/dev/whatsnew/3.7.html#id3
-if sys.version_info[:2] >= (3, 7):
-    from collections import abc as collections_abc
-else:
-    import collections as collections_abc
+    def clang_isExpression(self, kind: CursorKind) -> bool:
+        ...
 
-# We only support PathLike objects on Python version with os.fspath present
-# to be consistent with the Python standard library. On older Python versions
-# we only support strings and we have dummy fspath to just pass them through.
-try:
-    fspath = os.fspath
-except AttributeError:
+    def clang_isFunctionTypeVariadic(self, type: Type) -> bool:
+        ...
 
-    def fspath(x):
+    def clang_isInvalid(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isPODType(self, type: Type) -> bool:
+        ...
+
+    def clang_isPreprocessing(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isReference(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isRestrictQualifiedType(self, type: Type) -> bool:
+        ...
+
+    def clang_isStatement(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isTranslationUnit(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isUnexposed(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isVolatileQualifiedType(self, type: Type) -> bool:
+        ...
+
+    def clang_parseTranslationUnit(
+        self,
+        index: Index,
+        filename: str | None,
+        args_array: Array[c_char_p] | None,
+        num_args: int,
+        unsaved_files: Array[_CXUnsavedFile] | None,
+        num_unsaved: int,
+        options: int,
+    ) -> CObjP:
+        ...
+
+    def clang_reparseTranslationUnit(
+        self,
+        tu: TranslationUnit,
+        num_unsaved: int,
+        unsabed_files: Array[_CXUnsavedFile] | int,
+        options: int,
+    ) -> c_int:
+        ...
+
+    def clang_saveTranslationUnit(
+        self, tu: TranslationUnit, filename: str, options: c_uint
+    ) -> int:
+        ...
+
+    def clang_tokenize(
+        self,
+        tu: TranslationUnit,
+        range: SourceRange,
+        tokens_memory: _CArgObject,
+        tokens_count: _CArgObject,
+    ) -> None:
+        ...
+
+    def clang_visitChildren(
+        self, cursor: Cursor, callback: _FuncPointer, children: list[Cursor]
+    ) -> c_uint:
+        ...
+
+    def clang_Cursor_getNumArguments(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Cursor_getArgument(self, cursor: Cursor, key: int) -> Cursor:
+        ...
+
+    def clang_Cursor_getNumTemplateArguments(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Cursor_getStorageClass(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Cursor_getTemplateArgumentKind(
+        self, cursor: Cursor, key: int
+    ) -> TemplateArgumentKind:
+        ...
+
+    def clang_Cursor_getTemplateArgumentType(self, cursor: Cursor, key: int) -> Type:
+        ...
+
+    def clang_Cursor_getTemplateArgumentValue(self, cursor: Cursor, key: int) -> int:
+        ...
+
+    def clang_Cursor_getTemplateArgumentUnsignedValue(
+        self, cursor: Cursor, key: int
+    ) -> int:
+        ...
+
+    def clang_Cursor_isAnonymous(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_Cursor_isBitField(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_Cursor_getBriefCommentText(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_Cursor_getRawCommentText(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_Cursor_getOffsetOfField(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Location_isInSystemHeader(self, loc: SourceLocation) -> bool:
+        ...
+
+    def clang_Type_getAlignOf(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getClassType(self, type: Type) -> Type:
+        ...
+
+    def clang_Type_getNumTemplateArguments(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getTemplateArgumentAsType(self, type: Type, key: int) -> Type:
+        ...
+
+    def clang_Type_getOffsetOf(self, type: Type, fieldname: str) -> int:
+        ...
+
+    def clang_Type_getSizeOf(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getCXXRefQualifier(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getNamedType(self, type: Type) -> Type:
+        ...
+
+    def clang_Type_visitFields(
+        self, type: Type, callback: _FuncPointer, fields: list[Cursor]
+    ) -> c_uint:
+        ...
+
+
+# Python 3 strings are unicode, translate them to/from utf8 for C-interop.
+class c_interop_string(c_char_p):
+    def __init__(self, p: str | bytes | None = None):
+        if p is None:
+            p = ""
+        if isinstance(p, str):
+            p = p.encode("utf8")
+        super(c_char_p, self).__init__(p)
+
+    def __str__(self) -> str:
+        return self.value or ""
+
+    @property
+    def value(self) -> st...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/78114


More information about the cfe-commits mailing list