[clang] nonblocking/nonallocating attributes: 2nd pass caller/callee analysis (PR #99656)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 23 12:54:06 PDT 2024
================
@@ -0,0 +1,1566 @@
+//=== SemaFunctionEffects.cpp - Sema handling of function effects ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Sema handling of function effects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Sema/SemaInternal.h"
+
+#define DEBUG_TYPE "effectanalysis"
+
+using namespace clang;
+
+namespace {
+
+enum class ViolationID : uint8_t {
+ None = 0, // Sentinel for an empty Violation.
+ // These first few map to a %select{} in a diagnostic.
+ BaseDiagnosticIndex,
+ AllocatesMemory = BaseDiagnosticIndex,
+ ThrowsOrCatchesExceptions,
+ HasStaticLocalVariable,
+ AccessesThreadLocalVariable,
+ AccessesObjCMethodOrProperty,
+
+ // These only apply to callees, where the analysis stops at the Decl.
+ DeclDisallowsInference,
+
+ // These both apply to indirect calls. The difference is that sometimes
+ // we have an actual Decl (generally a variable) which is the function
+ // pointer being called, and sometimes, typically due to a cast, we only
+ // have an expression.
+ CallsDeclWithoutEffect,
+ CallsExprWithoutEffect,
+};
+
+// Information about the AST context in which a violation was found, so
+// that diagnostics can point to the correct source.
+class ViolationSite {
+public:
+ enum class Kind : uint8_t {
+ Default = 0, // Function body.
+ MemberInitializer = 1,
+ DefaultArgExpr = 2
+ };
+
+private:
+ llvm::PointerIntPair<CXXDefaultArgExpr *, 2, Kind> Impl;
+
+public:
+ ViolationSite() = default;
+
+ explicit ViolationSite(CXXDefaultArgExpr *E)
+ : Impl(E, Kind::DefaultArgExpr) {}
+
+ Kind kind() const { return static_cast<Kind>(Impl.getInt()); }
+ CXXDefaultArgExpr *defaultArgExpr() const { return Impl.getPointer(); }
+
+ void setKind(Kind K) { Impl.setPointerAndInt(nullptr, K); }
+};
+
+// Represents a violation of the rules, potentially for the entire duration of
+// the analysis phase, in order to refer to it when explaining why a caller has
+// been made unsafe by a callee. Can be transformed into either a Diagnostic
+// (warning or a note), depending on whether the violation pertains to a
+// function failing to be verifed as holding an effect vs. a function failing to
+// be inferred as holding that effect.
+struct Violation {
+ FunctionEffect Effect;
+ FunctionEffect
+ CalleeEffectPreventingInference; // Only for certain IDs; can be None.
+ ViolationID ID = ViolationID::None;
+ ViolationSite Site;
+ SourceLocation Loc;
+ const Decl *Callee = nullptr; // Only valid for Calls*.
+
+ Violation() = default;
+
+ Violation(FunctionEffect Effect, ViolationID ID, ViolationSite VS,
+ SourceLocation Loc, const Decl *Callee = nullptr,
+ std::optional<FunctionEffect> CalleeEffect = std::nullopt)
+ : Effect(Effect), ID(ID), Site(VS), Loc(Loc), Callee(Callee) {
+ if (CalleeEffect)
+ CalleeEffectPreventingInference = *CalleeEffect;
+ }
+
+ unsigned diagnosticSelectIndex() const {
+ return unsigned(ID) - unsigned(ViolationID::BaseDiagnosticIndex);
+ }
+};
+
+enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete };
+enum class CallableType : uint8_t {
+ // Unknown: probably function pointer
+ Unknown,
+ Function,
+ Virtual,
+ Block
+};
+
+// Return whether a function's effects CAN be verified.
+// The question of whether it SHOULD be verified is independent.
+static bool functionIsVerifiable(const FunctionDecl *FD) {
+ if (FD->isTrivial()) {
+ // Otherwise `struct x { int a; };` would have an unverifiable default
+ // constructor.
+ return true;
+ }
+ return FD->hasBody();
+}
+
+static bool isNoexcept(const FunctionDecl *FD) {
+ const auto *FPT = FD->getType()->castAs<FunctionProtoType>();
+ if (FPT->isNothrow() || FD->hasAttr<NoThrowAttr>())
+ return true;
+ return false;
+}
+
+// This list is probably incomplete.
+// FIXME: Investigate:
+// __builtin_eh_return?
+// __builtin_allow_runtime_check?
+// __builtin_unwind_init and other similar things that sound exception-related.
+// va_copy?
+// coroutines?
+static FunctionEffectKindSet getBuiltinFunctionEffects(unsigned BuiltinID) {
+ FunctionEffectKindSet Result;
+
+ switch (BuiltinID) {
+ case 0: // Not builtin.
+ default: // By default, builtins have no known effects.
+ break;
+
+ // These allocate/deallocate heap memory.
+ case Builtin::ID::BI__builtin_calloc:
+ case Builtin::ID::BI__builtin_malloc:
+ case Builtin::ID::BI__builtin_realloc:
+ case Builtin::ID::BI__builtin_free:
+ case Builtin::ID::BI__builtin_operator_delete:
+ case Builtin::ID::BI__builtin_operator_new:
+ case Builtin::ID::BIaligned_alloc:
+ case Builtin::ID::BIcalloc:
+ case Builtin::ID::BImalloc:
+ case Builtin::ID::BImemalign:
+ case Builtin::ID::BIrealloc:
+ case Builtin::ID::BIfree:
+
+ case Builtin::ID::BIfopen:
+ case Builtin::ID::BIpthread_create:
+ case Builtin::ID::BI_Block_object_dispose:
+ Result.insert(FunctionEffect(FunctionEffect::Kind::Allocating));
+ break;
+
+ // These block in some other way than allocating memory.
+ case Builtin::ID::BIlongjmp:
+ case Builtin::ID::BI_longjmp:
+ case Builtin::ID::BIsiglongjmp:
+ case Builtin::ID::BI__builtin_longjmp:
+ case Builtin::ID::BIobjc_exception_throw:
+
+ // Objective-C runtime.
+ case Builtin::ID::BIobjc_msgSend:
+ case Builtin::ID::BIobjc_msgSend_fpret:
+ case Builtin::ID::BIobjc_msgSend_fp2ret:
+ case Builtin::ID::BIobjc_msgSend_stret:
+ case Builtin::ID::BIobjc_msgSendSuper:
+ case Builtin::ID::BIobjc_getClass:
+ case Builtin::ID::BIobjc_getMetaClass:
+ case Builtin::ID::BIobjc_enumerationMutation:
+ case Builtin::ID::BIobjc_assign_ivar:
+ case Builtin::ID::BIobjc_assign_global:
+ case Builtin::ID::BIobjc_sync_enter:
+ case Builtin::ID::BIobjc_sync_exit:
+ case Builtin::ID::BINSLog:
+ case Builtin::ID::BINSLogv:
+
+ // stdio.h
+ case Builtin::ID::BIfread:
+ case Builtin::ID::BIfwrite:
+
+ // stdio.h: printf family.
+ case Builtin::ID::BIprintf:
+ case Builtin::ID::BI__builtin_printf:
+ case Builtin::ID::BIfprintf:
+ case Builtin::ID::BIsnprintf:
+ case Builtin::ID::BIsprintf:
+ case Builtin::ID::BIvprintf:
+ case Builtin::ID::BIvfprintf:
+ case Builtin::ID::BIvsnprintf:
+ case Builtin::ID::BIvsprintf:
+
+ // stdio.h: scanf family.
+ case Builtin::ID::BIscanf:
+ case Builtin::ID::BIfscanf:
+ case Builtin::ID::BIsscanf:
+ case Builtin::ID::BIvscanf:
+ case Builtin::ID::BIvfscanf:
+ case Builtin::ID::BIvsscanf:
+
+ Result.insert(FunctionEffect(FunctionEffect::Kind::Blocking));
+ break;
+ }
+
+ return Result;
+}
+
+// Transitory, more extended information about a callable, which can be a
+// function, block, or function pointer.
+struct CallableInfo {
+ // CDecl holds the function's definition, if any.
+ // FunctionDecl if CallableType::Function or Virtual
+ // BlockDecl if CallableType::Block
+ const Decl *CDecl;
+
+ // Remember whether the callable is a function, block, virtual method,
+ // or (presumed) function pointer.
+ CallableType CType = CallableType::Unknown;
+
+ // Remember whether the callable is an operator new or delete function,
+ // so that calls to them are reported more meaningfully, as memory
+ // allocations.
+ SpecialFuncType FuncType = SpecialFuncType::None;
+
+ // We inevitably want to know the callable's declared effects, so cache them.
+ FunctionEffectKindSet Effects;
+
+ CallableInfo(const Decl &CD, SpecialFuncType FT = SpecialFuncType::None)
+ : CDecl(&CD), FuncType(FT) {
+ FunctionEffectsRef DeclEffects;
+ if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) {
+ // Use the function's definition, if any.
+ if (const FunctionDecl *Def = FD->getDefinition())
+ CDecl = FD = Def;
+ CType = CallableType::Function;
+ if (auto *Method = dyn_cast<CXXMethodDecl>(FD);
+ Method && Method->isVirtual())
+ CType = CallableType::Virtual;
+ DeclEffects = FD->getFunctionEffects();
+ } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) {
+ CType = CallableType::Block;
+ DeclEffects = BD->getFunctionEffects();
+ } else if (auto *VD = dyn_cast<ValueDecl>(CDecl)) {
+ // ValueDecl is function, enum, or variable, so just look at its type.
+ DeclEffects = FunctionEffectsRef::get(VD->getType());
+ }
+ Effects = FunctionEffectKindSet(DeclEffects);
+ }
+
+ CallableType type() const { return CType; }
+
+ bool isCalledDirectly() const {
+ return CType == CallableType::Function || CType == CallableType::Block;
+ }
+
+ bool isVerifiable() const {
+ switch (CType) {
+ case CallableType::Unknown:
+ case CallableType::Virtual:
+ return false;
+ case CallableType::Block:
+ return true;
+ case CallableType::Function:
+ return functionIsVerifiable(dyn_cast<FunctionDecl>(CDecl));
+ }
+ llvm_unreachable("undefined CallableType");
+ }
+
+ /// Generate a name for logging and diagnostics.
+ std::string name(Sema &S) const {
+ std::string Name;
+ llvm::raw_string_ostream OS(Name);
+
+ if (auto *FD = dyn_cast<FunctionDecl>(CDecl))
+ FD->getNameForDiagnostic(OS, S.getPrintingPolicy(),
+ /*Qualified=*/true);
+ else if (auto *BD = dyn_cast<BlockDecl>(CDecl))
+ OS << "(block " << BD->getBlockManglingNumber() << ")";
+ else if (auto *VD = dyn_cast<NamedDecl>(CDecl))
+ VD->printQualifiedName(OS);
+ return Name;
+ }
+};
+
+// ----------
+// Map effects to single Violations, to hold the first (of potentially many)
+// violations pertaining to an effect, per function.
+class EffectToViolationMap {
+ // Since we currently only have a tiny number of effects (typically no more
+ // than 1), use a SmallVector with an inline capacity of 1. Since it
+ // is often empty, use a unique_ptr to the SmallVector.
+ // Note that Violation itself contains a FunctionEffect which is the key.
+ using ImplVec = llvm::SmallVector<Violation, 1>;
+ std::unique_ptr<ImplVec> Impl;
----------------
Sirraide wrote:
Because I don’t think we have anything for ‘0 or 1 of something’, do we? This feels like a `TinyPtrVector` situation, but `Violation` isn’t exactly a pointer...
https://github.com/llvm/llvm-project/pull/99656
More information about the cfe-commits
mailing list