[clang] [clang-bindings] Add strict typing to clang Python bindings (#76664) (PR #78114)

Jannick Kremer via cfe-commits cfe-commits at lists.llvm.org
Sun Jan 14 13:59:21 PST 2024


================
@@ -67,89 +67,690 @@
 import clang.enumerations
 
 import os
-import sys
-
-if sys.version_info[0] == 3:
-    # Python 3 strings are unicode, translate them to/from utf8 for C-interop.
-    class c_interop_string(c_char_p):
-        def __init__(self, p=None):
-            if p is None:
-                p = ""
-            if isinstance(p, str):
-                p = p.encode("utf8")
-            super(c_char_p, self).__init__(p)
-
-        def __str__(self):
-            return self.value
-
-        @property
-        def value(self):
-            if super(c_char_p, self).value is None:
-                return None
-            return super(c_char_p, self).value.decode("utf8")
-
-        @classmethod
-        def from_param(cls, param):
-            if isinstance(param, str):
-                return cls(param)
-            if isinstance(param, bytes):
-                return cls(param)
-            if param is None:
-                # Support passing null to C functions expecting char arrays
-                return None
-            raise TypeError(
-                "Cannot convert '{}' to '{}'".format(type(param).__name__, cls.__name__)
-            )
+from enum import Enum
+
+from typing import (
+    Any,
+    Callable,
+    cast as Tcast,
+    Generic,
+    Iterable,
+    Iterator,
+    Optional,
+    Sequence,
+    Type as TType,
+    TypeVar,
+    TYPE_CHECKING,
+    Union as TUnion,
+)
+from typing_extensions import Protocol, TypeAlias
+
+if TYPE_CHECKING:
+    from ctypes import _Pointer, _FuncPointer, _CArgObject
+    from io import TextIOWrapper
+
+    StrPath: TypeAlias = TUnion[str, os.PathLike[str]]
+    InMemoryFile: TypeAlias = (
+        "tuple[TUnion[str, os.PathLike[Any]], TUnion[str, TextIOWrapper]]"
+    )
+    LibFunc: TypeAlias = TUnion[
+        "tuple[str, Optional[list[Any]]]",
+        "tuple[str, Optional[list[Any]], Any]",
+        "tuple[str, Optional[list[Any]], Any, Callable[..., Any]]",
+    ]
+    CObjP: TypeAlias = _Pointer[Any]
 
-        @staticmethod
-        def to_python_string(x, *args):
-            return x.value
+    TSeq = TypeVar("TSeq", covariant=True)
 
-    def b(x):
-        if isinstance(x, bytes):
-            return x
-        return x.encode("utf8")
+    class NoSliceSequence(Protocol[TSeq]):
+        def __len__(self) -> int:
+            ...
 
-elif sys.version_info[0] == 2:
-    # Python 2 strings are utf8 byte strings, no translation is needed for
-    # C-interop.
-    c_interop_string = c_char_p
+        def __getitem__(self, key: int) -> TSeq:
+            ...
 
-    def _to_python_string(x, *args):
-        return x
 
-    c_interop_string.to_python_string = staticmethod(_to_python_string)
+class ClangLib(Protocol):
+    def clang_annotateTokens(
+        self, tu: TranslationUnit, token: _CArgObject, num: int, cursor: _CArgObject
+    ) -> None:
+        ...
 
-    def b(x):
-        return x
+    def clang_CompilationDatabase_dispose(self, cdb: CompilationDatabase) -> None:
+        ...
+
+    def clang_CompilationDatabase_fromDirectory(
+        self, buildDir: str, errorCode: _CArgObject
+    ) -> CompilationDatabase:
+        ...
+
+    def clang_CompilationDatabase_getAllCompileCommands(
+        self, cdb: CompilationDatabase
+    ) -> CompileCommands:
+        ...
+
+    def clang_CompilationDatabase_getCompileCommands(
+        self, cdb: CompilationDatabase, filename: str
+    ) -> CompileCommands:
+        ...
+
+    def clang_CompileCommands_dispose(self, ccmds: CObjP) -> None:
+        ...
+
+    def clang_CompileCommands_getCommand(self, ccmds: CObjP, key: int) -> CObjP:
+        ...
+
+    def clang_CompileCommands_getSize(self, ccmds: CObjP) -> c_uint:
+        ...
+
+    def clang_CompileCommand_getArg(self, cmd: CObjP, key: int) -> str:
+        ...
+
+    def clang_CompileCommand_getDirectory(self, cmd: CObjP) -> str:
+        ...
+
+    def clang_CompileCommand_getFilename(self, cmd: CObjP) -> str:
+        ...
+
+    def clang_CompileCommand_getNumArgs(self, cmd: CObjP) -> int:
+        ...
+
+    def clang_codeCompleteAt(
+        self,
+        tu: TranslationUnit,
+        filename: str,
+        line: int,
+        column: int,
+        unsaved_files: TUnion[int, Array[_CXUnsavedFile]],
+        num_unsaved_files: int,
+        options: int,
+    ) -> _Pointer[CCRStructure]:
+        ...
+
+    def clang_codeCompleteGetDiagnostic(
+        self, ccrs: CodeCompletionResults, key: int
+    ) -> Diagnostic:
+        ...
+
+    def clang_codeCompleteGetNumDiagnostics(self, ccrs: CodeCompletionResults) -> c_int:
+        ...
+
+    def clang_createIndex(self, excludeDecls: int, displayDiagnostics: int) -> CObjP:
+        ...
+
+    def clang_createTranslationUnit(self, index: Index, filename: str) -> CObjP:
+        ...
+
+    def clang_CXXConstructor_isConvertingConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXConstructor_isCopyConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXConstructor_isDefaultConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXConstructor_isMoveConstructor(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXField_isMutable(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isConst(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isDefaulted(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isDeleted(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isCopyAssignmentOperator(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isMoveAssignmentOperator(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isExplicit(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isPureVirtual(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isStatic(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXMethod_isVirtual(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_CXXRecord_isAbstract(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_EnumDecl_isScoped(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_defaultDiagnosticDisplayOptions(self) -> int:
+        ...
+
+    def clang_defaultSaveOptions(self, tu: TranslationUnit) -> c_uint:
+        ...
+
+    def clang_disposeCodeCompleteResults(self, tu: CodeCompletionResults) -> None:
+        ...
+
+    def clang_disposeDiagnostic(self, diag: Diagnostic) -> None:
+        ...
+
+    def clang_disposeIndex(self, index: Index) -> None:
+        ...
+
+    def clang_disposeString(self, string: _CXString) -> None:
+        ...
+
+    def clang_disposeTokens(
+        self, tu: TranslationUnit, tokens: _Pointer[Token], count: c_uint
+    ) -> None:
+        ...
+
+    def clang_disposeTranslationUnit(self, tu: TranslationUnit) -> None:
+        ...
+
+    def clang_equalCursors(self, self_c: Cursor, other_c: Cursor) -> bool:
+        ...
+
+    def clang_equalLocations(
+        self, self_loc: SourceLocation, other_loc: SourceLocation
+    ) -> bool:
+        ...
+
+    def clang_equalRanges(self, self_r: SourceRange, other_r: SourceRange) -> bool:
+        ...
+
+    def clang_equalTypes(self, self_t: Type, other_t: Type) -> bool:
+        ...
+
+    def clang_formatDiagnostic(self, diag: Diagnostic, options: int) -> str:
+        ...
+
+    def clang_getAddressSpace(self, type: Type) -> int:
+        ...
+
+    def clang_getArgType(self, parent: Type, key: int) -> Type:
+        ...
+
+    def clang_getArrayElementType(self, type: Type) -> Type:
+        ...
+
+    def clang_getArraySize(self, type: Type) -> int:
+        ...
+
+    def clang_getFieldDeclBitWidth(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCanonicalCursor(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCanonicalType(self, type: Type) -> Type:
+        ...
+
+    def clang_getChildDiagnostics(self, diag: Diagnostic) -> CObjP:
+        ...
+
+    def clang_getCompletionAvailability(self, cs_obj: CObjP) -> int:
+        ...
+
+    def clang_getCompletionBriefComment(self, cs_obj: CObjP) -> str:
+        ...
+
+    def clang_getCompletionChunkCompletionString(
+        self, cs_obj: CObjP, key: int
+    ) -> CObjP:
+        ...
+
+    def clang_getCompletionChunkKind(self, cs_obj: CObjP, key: int) -> int:
+        ...
+
+    def clang_getCompletionChunkText(self, cs_obj: CObjP, key: int) -> str:
+        ...
+
+    def clang_getCompletionPriority(self, cs_obj: CObjP) -> int:
+        ...
+
+    def clang_getCString(self, string: _CXString) -> str | None:
+        ...
+
+    def clang_getCursor(self, tu: TranslationUnit, loc: SourceLocation) -> Cursor:
+        ...
+
+    def clang_getCursorAvailability(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorDefinition(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorDisplayName(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getCursorExceptionSpecificationType(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorExtent(self, cursor: Cursor) -> SourceRange:
+        ...
+
+    def clang_getCursorLexicalParent(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorLinkage(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorLocation(self, cursor: Cursor) -> SourceLocation:
+        ...
+
+    def clang_getCursorReferenced(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorResultType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getCursorSemanticParent(self, cursor: Cursor) -> Cursor:
+        ...
+
+    def clang_getCursorSpelling(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getCursorTLSKind(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getCursorType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getCursorUSR(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_Cursor_getMangling(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getCXXAccessSpecifier(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getDeclObjCTypeEncoding(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_getDiagnostic(self, tu: TranslationUnit, key: int) -> CObjP:
+        ...
+
+    def clang_getDiagnosticCategory(self, diag: Diagnostic) -> int:
+        ...
+
+    def clang_getDiagnosticCategoryText(self, diag: Diagnostic) -> str:
+        ...
+
+    def clang_getDiagnosticFixIt(
+        self, diag: Diagnostic, key: int, sr: _CArgObject
+    ) -> str:
+        ...
+
+    def clang_getDiagnosticInSet(self, diag_set: CObjP, key: int) -> CObjP:
+        ...
+
+    def clang_getDiagnosticLocation(self, diag: Diagnostic) -> SourceLocation:
+        ...
+
+    def clang_getDiagnosticNumFixIts(self, diag: Diagnostic) -> c_uint:
+        ...
+
+    def clang_getDiagnosticNumRanges(self, diag: Diagnostic) -> c_uint:
+        ...
+
+    def clang_getDiagnosticOption(
+        self, diag: Diagnostic, disable: _CArgObject | None
+    ) -> str:
+        ...
+
+    def clang_getDiagnosticRange(self, diag: Diagnostic, key: int) -> SourceRange:
+        ...
+
+    def clang_getDiagnosticSeverity(self, diag: Diagnostic) -> int:
+        ...
+
+    def clang_getDiagnosticSpelling(self, diag: Diagnostic) -> str:
+        ...
+
+    def clang_getElementType(self, type: Type) -> Type:
+        ...
+
+    def clang_getEnumConstantDeclUnsignedValue(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getEnumConstantDeclValue(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_getEnumDeclIntegerType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getExceptionSpecificationType(self, type: Type) -> int:
+        ...
+
+    def clang_getFile(self, tu: TranslationUnit, filename: str) -> CObjP:
+        ...
+
+    def clang_getFileName(self, file: File) -> str:
+        ...
+
+    def clang_getFileTime(self, file: File) -> int:
+        ...
+
+    def clang_getIncludedFile(self, cursor: Cursor) -> File:
+        ...
+
+    def clang_getInclusions(
+        self,
+        tu: TranslationUnit,
+        callback: _FuncPointer,
+        inclusions: list[FileInclusion],
+    ) -> None:
+        ...
+
+    def clang_getInstantiationLocation(
+        self,
+        loc: SourceLocation,
+        file: _CArgObject,
+        line: _CArgObject,
+        column: _CArgObject,
+        offset: _CArgObject,
+    ) -> None:
+        ...
+
+    def clang_getLocation(
+        self, tu: TranslationUnit, file: File, line: int, column: int
+    ) -> SourceLocation:
+        ...
+
+    def clang_getLocationForOffset(
+        self, tu: TranslationUnit, file: File, offset: int
+    ) -> SourceLocation:
+        ...
+
+    def clang_getNullCursor(self) -> Cursor:
+        ...
+
+    def clang_getNumArgTypes(self, type: Type) -> int:
+        ...
+
+    def clang_getNumCompletionChunks(self, cs_obj: CObjP) -> int:
+        ...
+
+    def clang_getNumDiagnostics(self, tu: TranslationUnit) -> c_uint:
+        ...
+
+    def clang_getNumDiagnosticsInSet(self, diag_set: CObjP) -> c_uint:
+        ...
+
+    def clang_getNumElements(self, type: Type) -> int:
+        ...
+
+    def clang_getPointeeType(self, type: Type) -> Type:
+        ...
+
+    def clang_getRange(self, start: SourceLocation, end: SourceLocation) -> SourceRange:
+        ...
+
+    def clang_getRangeEnd(self, range: SourceRange) -> SourceLocation:
+        ...
+
+    def clang_getRangeStart(self, range: SourceRange) -> SourceLocation:
+        ...
+
+    def clang_getResultType(self, type: Type) -> Type:
+        ...
+
+    def clang_getTokenExtent(self, tu: TranslationUnit, token: Token) -> SourceRange:
+        ...
+
+    def clang_getTokenKind(self, token: Token) -> int:
+        ...
+
+    def clang_getTokenLocation(
+        self, tu: TranslationUnit, token: Token
+    ) -> SourceLocation:
+        ...
+
+    def clang_getTokenSpelling(self, tu: TranslationUnit, token: Token) -> str:
+        ...
+
+    def clang_getTranslationUnitCursor(self, tu: TranslationUnit) -> Cursor:
+        ...
+
+    def clang_getTranslationUnitSpelling(self, tu: TranslationUnit) -> str:
+        ...
+
+    def clang_getTypeDeclaration(self, type: Type) -> Cursor:
+        ...
+
+    def clang_getTypedefDeclUnderlyingType(self, cursor: Cursor) -> Type:
+        ...
+
+    def clang_getTypedefName(self, type: Type) -> str:
+        ...
+
+    def clang_getTypeKindSpelling(self, kind: int) -> str:
+        ...
+
+    def clang_getTypeSpelling(self, type: Type) -> str:
+        ...
+
+    def clang_hashCursor(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_isAttribute(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isConstQualifiedType(self, type: Type) -> bool:
+        ...
+
+    def clang_isCursorDefinition(self, cursor: Cursor) -> bool:
+        ...
 
+    def clang_isDeclaration(self, kind: CursorKind) -> bool:
+        ...
 
-# Importing ABC-s directly from collections is deprecated since Python 3.7,
-# will stop working in Python 3.8.
-# See: https://docs.python.org/dev/whatsnew/3.7.html#id3
-if sys.version_info[:2] >= (3, 7):
-    from collections import abc as collections_abc
-else:
-    import collections as collections_abc
+    def clang_isExpression(self, kind: CursorKind) -> bool:
+        ...
 
-# We only support PathLike objects on Python version with os.fspath present
-# to be consistent with the Python standard library. On older Python versions
-# we only support strings and we have dummy fspath to just pass them through.
-try:
-    fspath = os.fspath
-except AttributeError:
+    def clang_isFunctionTypeVariadic(self, type: Type) -> bool:
+        ...
 
-    def fspath(x):
+    def clang_isInvalid(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isPODType(self, type: Type) -> bool:
+        ...
+
+    def clang_isPreprocessing(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isReference(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isRestrictQualifiedType(self, type: Type) -> bool:
+        ...
+
+    def clang_isStatement(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isTranslationUnit(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isUnexposed(self, kind: CursorKind) -> bool:
+        ...
+
+    def clang_isVolatileQualifiedType(self, type: Type) -> bool:
+        ...
+
+    def clang_parseTranslationUnit(
+        self,
+        index: Index,
+        filename: str | None,
+        args_array: Array[c_char_p] | None,
+        num_args: int,
+        unsaved_files: Array[_CXUnsavedFile] | None,
+        num_unsaved: int,
+        options: int,
+    ) -> CObjP:
+        ...
+
+    def clang_reparseTranslationUnit(
+        self,
+        tu: TranslationUnit,
+        num_unsaved: int,
+        unsabed_files: Array[_CXUnsavedFile] | int,
+        options: int,
+    ) -> c_int:
+        ...
+
+    def clang_saveTranslationUnit(
+        self, tu: TranslationUnit, filename: str, options: c_uint
+    ) -> int:
+        ...
+
+    def clang_tokenize(
+        self,
+        tu: TranslationUnit,
+        range: SourceRange,
+        tokens_memory: _CArgObject,
+        tokens_count: _CArgObject,
+    ) -> None:
+        ...
+
+    def clang_visitChildren(
+        self, cursor: Cursor, callback: _FuncPointer, children: list[Cursor]
+    ) -> c_uint:
+        ...
+
+    def clang_Cursor_getNumArguments(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Cursor_getArgument(self, cursor: Cursor, key: int) -> Cursor:
+        ...
+
+    def clang_Cursor_getNumTemplateArguments(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Cursor_getStorageClass(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Cursor_getTemplateArgumentKind(
+        self, cursor: Cursor, key: int
+    ) -> TemplateArgumentKind:
+        ...
+
+    def clang_Cursor_getTemplateArgumentType(self, cursor: Cursor, key: int) -> Type:
+        ...
+
+    def clang_Cursor_getTemplateArgumentValue(self, cursor: Cursor, key: int) -> int:
+        ...
+
+    def clang_Cursor_getTemplateArgumentUnsignedValue(
+        self, cursor: Cursor, key: int
+    ) -> int:
+        ...
+
+    def clang_Cursor_isAnonymous(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_Cursor_isBitField(self, cursor: Cursor) -> bool:
+        ...
+
+    def clang_Cursor_getBriefCommentText(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_Cursor_getRawCommentText(self, cursor: Cursor) -> str:
+        ...
+
+    def clang_Cursor_getOffsetOfField(self, cursor: Cursor) -> int:
+        ...
+
+    def clang_Location_isInSystemHeader(self, loc: SourceLocation) -> bool:
+        ...
+
+    def clang_Type_getAlignOf(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getClassType(self, type: Type) -> Type:
+        ...
+
+    def clang_Type_getNumTemplateArguments(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getTemplateArgumentAsType(self, type: Type, key: int) -> Type:
+        ...
+
+    def clang_Type_getOffsetOf(self, type: Type, fieldname: str) -> int:
+        ...
+
+    def clang_Type_getSizeOf(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getCXXRefQualifier(self, type: Type) -> int:
+        ...
+
+    def clang_Type_getNamedType(self, type: Type) -> Type:
+        ...
+
+    def clang_Type_visitFields(
+        self, type: Type, callback: _FuncPointer, fields: list[Cursor]
+    ) -> c_uint:
+        ...
+
+
+# Python 3 strings are unicode, translate them to/from utf8 for C-interop.
+class c_interop_string(c_char_p):
+    def __init__(self, p: str | bytes | None = None):
+        if p is None:
+            p = ""
+        if isinstance(p, str):
+            p = p.encode("utf8")
+        super(c_char_p, self).__init__(p)
+
+    def __str__(self) -> str:
+        return self.value or ""
+
+    @property
+    def value(self) -> str | None:  # type: ignore
+        val = super(c_char_p, self).value
+        if val is None:
+            return None
+        return val.decode("utf8")
+
+    @classmethod
+    def from_param(cls, param: str | bytes | None) -> c_interop_string:
+        if isinstance(param, str):
+            return cls(param)
+        if isinstance(param, bytes):
+            return cls(param)
+        if param is None:
+            # Support passing null to C functions expecting char arrays
+            return cls(param)
+        raise TypeError(
+            "Cannot convert '{}' to '{}'".format(type(param).__name__, cls.__name__)
+        )
+
+    @staticmethod
+    def to_python_string(x: c_interop_string, *args: Any) -> str | None:
+        return x.value
+
+
+def b(x: str | bytes) -> bytes:
+    if isinstance(x, bytes):
         return x
+    return x.encode("utf8")
 
 
 # ctypes doesn't implicitly convert c_void_p to the appropriate wrapper
 # object. This is a problem, because it means that from_parameter will see an
 # integer and pass the wrong value on platforms where int != void*. Work around
 # this by marshalling object arguments as void**.
-c_object_p = POINTER(c_void_p)
-
-callbacks = {}
----------------
DeinAlptraum wrote:

`callbacks` could have been a `TypedDict`, but these are always a mouthful and I don't see a reason to have this dict in the first place, so I removed it.

https://github.com/llvm/llvm-project/pull/78114


More information about the cfe-commits mailing list