[clang] nonblocking/nonallocating attributes: 2nd pass caller/callee analysis (PR #99656)

Mon Sep 23 15:17:52 PDT 2024

================
@@ -0,0 +1,1572 @@
+//=== SemaFunctionEffects.cpp - Sema handling of function effects ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Sema handling of function effects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
+#include "clang/AST/Type.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Sema/SemaInternal.h"
+
+#define DEBUG_TYPE "effectanalysis"
+
+using namespace clang;
+
+namespace {
+
+enum class ViolationID : uint8_t {
+  None = 0, // Sentinel for an empty Violation.
+  // These first few map to a %select{} in a diagnostic.
+  BaseDiagnosticIndex,
+  AllocatesMemory = BaseDiagnosticIndex,
+  ThrowsOrCatchesExceptions,
+  HasStaticLocalVariable,
+  AccessesThreadLocalVariable,
+  AccessesObjCMethodOrProperty,
+
+  // These only apply to callees, where the analysis stops at the Decl.
+  DeclDisallowsInference,
+
+  // These both apply to indirect calls. The difference is that sometimes
+  // we have an actual Decl (generally a variable) which is the function
+  // pointer being called, and sometimes, typically due to a cast, we only
+  // have an expression.
+  CallsDeclWithoutEffect,
+  CallsExprWithoutEffect,
+};
+
+// Information about the AST context in which a violation was found, so
+// that diagnostics can point to the correct source.
+class ViolationSite {
+public:
+  enum class Kind : uint8_t {
+    Default = 0, // Function body.
+    MemberInitializer = 1,
+    DefaultArgExpr = 2
+  };
+
+private:
+  llvm::PointerIntPair<CXXDefaultArgExpr *, 2, Kind> Impl;
+
+public:
+  ViolationSite() = default;
+
+  explicit ViolationSite(CXXDefaultArgExpr *E)
+      : Impl(E, Kind::DefaultArgExpr) {}
+
+  Kind kind() const { return static_cast<Kind>(Impl.getInt()); }
+  CXXDefaultArgExpr *defaultArgExpr() const { return Impl.getPointer(); }
+
+  void setKind(Kind K) { Impl.setPointerAndInt(nullptr, K); }
+};
+
+// Represents a violation of the rules, potentially for the entire duration of
+// the analysis phase, in order to refer to it when explaining why a caller has
+// been made unsafe by a callee. Can be transformed into either a Diagnostic
+// (warning or a note), depending on whether the violation pertains to a
+// function failing to be verifed as holding an effect vs. a function failing to
+// be inferred as holding that effect.
+struct Violation {
+  FunctionEffect Effect;
+  FunctionEffect
+      CalleeEffectPreventingInference; // Only for certain IDs; can be None.
+  ViolationID ID = ViolationID::None;
+  ViolationSite Site;
+  SourceLocation Loc;
+  const Decl *Callee = nullptr; // Only valid for Calls*.
+
+  Violation() = default;
+
+  Violation(FunctionEffect Effect, ViolationID ID, ViolationSite VS,
+            SourceLocation Loc, const Decl *Callee = nullptr,
+            std::optional<FunctionEffect> CalleeEffect = std::nullopt)
+      : Effect(Effect), CalleeEffectPreventingInference(
+                            CalleeEffect.value_or(FunctionEffect())),
+        ID(ID), Site(VS), Loc(Loc), Callee(Callee) {}
+
+  unsigned diagnosticSelectIndex() const {
+    return unsigned(ID) - unsigned(ViolationID::BaseDiagnosticIndex);
+  }
+};
+
+enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete };
+enum class CallableType : uint8_t {
+  // Unknown: probably function pointer.
+  Unknown,
+  Function,
+  Virtual,
+  Block
+};
+
+// Return whether a function's effects CAN be verified.
+// The question of whether it SHOULD be verified is independent.
+static bool functionIsVerifiable(const FunctionDecl *FD) {
+  if (FD->isTrivial()) {
+    // Otherwise `struct x { int a; };` would have an unverifiable default
+    // constructor.
+    return true;
+  }
+  return FD->hasBody();
+}
+
+static bool isNoexcept(const FunctionDecl *FD) {
+  const auto *FPT = FD->getType()->castAs<FunctionProtoType>();
+  if (FPT->isNothrow() || FD->hasAttr<NoThrowAttr>())
+    return true;
+  return false;
+}
+
+// This list is probably incomplete.
+// FIXME: Investigate:
+// __builtin_eh_return?
+// __builtin_allow_runtime_check?
+// __builtin_unwind_init and other similar things that sound exception-related.
+// va_copy?
+// coroutines?
+static FunctionEffectKindSet getBuiltinFunctionEffects(unsigned BuiltinID) {
+  FunctionEffectKindSet Result;
+
+  switch (BuiltinID) {
+  case 0:  // Not builtin.
+  default: // By default, builtins have no known effects.
+    break;
+
+  // These allocate/deallocate heap memory.
+  case Builtin::ID::BI__builtin_calloc:
+  case Builtin::ID::BI__builtin_malloc:
+  case Builtin::ID::BI__builtin_realloc:
+  case Builtin::ID::BI__builtin_free:
+  case Builtin::ID::BI__builtin_operator_delete:
+  case Builtin::ID::BI__builtin_operator_new:
+  case Builtin::ID::BIaligned_alloc:
+  case Builtin::ID::BIcalloc:
+  case Builtin::ID::BImalloc:
+  case Builtin::ID::BImemalign:
+  case Builtin::ID::BIrealloc:
+  case Builtin::ID::BIfree:
+
+  case Builtin::ID::BIfopen:
+  case Builtin::ID::BIpthread_create:
+  case Builtin::ID::BI_Block_object_dispose:
+    Result.insert(FunctionEffect(FunctionEffect::Kind::Allocating));
+    break;
+
+  // These block in some other way than allocating memory.
+  case Builtin::ID::BIlongjmp:
+  case Builtin::ID::BI_longjmp:
+  case Builtin::ID::BIsiglongjmp:
+  case Builtin::ID::BI__builtin_longjmp:
+  case Builtin::ID::BIobjc_exception_throw:
+
+  // Objective-C runtime.
+  case Builtin::ID::BIobjc_msgSend:
+  case Builtin::ID::BIobjc_msgSend_fpret:
+  case Builtin::ID::BIobjc_msgSend_fp2ret:
+  case Builtin::ID::BIobjc_msgSend_stret:
+  case Builtin::ID::BIobjc_msgSendSuper:
+  case Builtin::ID::BIobjc_getClass:
+  case Builtin::ID::BIobjc_getMetaClass:
+  case Builtin::ID::BIobjc_enumerationMutation:
+  case Builtin::ID::BIobjc_assign_ivar:
+  case Builtin::ID::BIobjc_assign_global:
+  case Builtin::ID::BIobjc_sync_enter:
+  case Builtin::ID::BIobjc_sync_exit:
+  case Builtin::ID::BINSLog:
+  case Builtin::ID::BINSLogv:
+
+  // stdio.h
+  case Builtin::ID::BIfread:
+  case Builtin::ID::BIfwrite:
+
+  // stdio.h: printf family.
+  case Builtin::ID::BIprintf:
+  case Builtin::ID::BI__builtin_printf:
+  case Builtin::ID::BIfprintf:
+  case Builtin::ID::BIsnprintf:
+  case Builtin::ID::BIsprintf:
+  case Builtin::ID::BIvprintf:
+  case Builtin::ID::BIvfprintf:
+  case Builtin::ID::BIvsnprintf:
+  case Builtin::ID::BIvsprintf:
+
+  // stdio.h: scanf family.
+  case Builtin::ID::BIscanf:
+  case Builtin::ID::BIfscanf:
+  case Builtin::ID::BIsscanf:
+  case Builtin::ID::BIvscanf:
+  case Builtin::ID::BIvfscanf:
+  case Builtin::ID::BIvsscanf:
+
+    Result.insert(FunctionEffect(FunctionEffect::Kind::Blocking));
+    break;
+  }
+
+  return Result;
+}
+
+// Transitory, more extended information about a callable, which can be a
+// function, block, or function pointer.
+struct CallableInfo {
+  // CDecl holds the function's definition, if any.
+  // FunctionDecl if CallableType::Function or Virtual
+  // BlockDecl if CallableType::Block
+  const Decl *CDecl;
+
+  // Remember whether the callable is a function, block, virtual method,
+  // or (presumed) function pointer.
+  CallableType CType = CallableType::Unknown;
+
+  // Remember whether the callable is an operator new or delete function,
+  // so that calls to them are reported more meaningfully, as memory
+  // allocations.
+  SpecialFuncType FuncType = SpecialFuncType::None;
+
+  // We inevitably want to know the callable's declared effects, so cache them.
+  FunctionEffectKindSet Effects;
+
+  CallableInfo(const Decl &CD, SpecialFuncType FT = SpecialFuncType::None)
+      : CDecl(&CD), FuncType(FT) {
+    FunctionEffectsRef DeclEffects;
+    if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) {
+      // Use the function's definition, if any.
+      if (const FunctionDecl *Def = FD->getDefinition())
+        CDecl = FD = Def;
+      CType = CallableType::Function;
+      if (auto *Method = dyn_cast<CXXMethodDecl>(FD);
+          Method && Method->isVirtual())
+        CType = CallableType::Virtual;
+      DeclEffects = FD->getFunctionEffects();
+    } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) {
+      CType = CallableType::Block;
+      DeclEffects = BD->getFunctionEffects();
+    } else if (auto *VD = dyn_cast<ValueDecl>(CDecl)) {
+      // ValueDecl is function, enum, or variable, so just look at its type.
+      DeclEffects = FunctionEffectsRef::get(VD->getType());
+    }
+    Effects = FunctionEffectKindSet(DeclEffects);
+  }
+
+  CallableType type() const { return CType; }
+
+  bool isCalledDirectly() const {
+    return CType == CallableType::Function || CType == CallableType::Block;
+  }
+
+  bool isVerifiable() const {
+    switch (CType) {
+    case CallableType::Unknown:
+    case CallableType::Virtual:
+      return false;
+    case CallableType::Block:
+      return true;
+    case CallableType::Function:
+      return functionIsVerifiable(dyn_cast<FunctionDecl>(CDecl));
+    }
+    llvm_unreachable("undefined CallableType");
+  }
+
+  /// Generate a name for logging and diagnostics.
+  std::string name(Sema &S) const {
+    std::string Name;
+    llvm::raw_string_ostream OS(Name);
+
+    if (auto *FD = dyn_cast<FunctionDecl>(CDecl))
+      FD->getNameForDiagnostic(OS, S.getPrintingPolicy(),
+                               /*Qualified=*/true);
+    else if (auto *BD = dyn_cast<BlockDecl>(CDecl))
+      OS << "(block " << BD->getBlockManglingNumber() << ")";
+    else if (auto *VD = dyn_cast<NamedDecl>(CDecl))
+      VD->printQualifiedName(OS);
+    return Name;
+  }
+};
+
+// ----------
+// Map effects to single Violations, to hold the first (of potentially many)
+// violations pertaining to an effect, per function.
+class EffectToViolationMap {
+  // Since we currently only have a tiny number of effects (typically no more
+  // than 1), use a SmallVector with an inline capacity of 1. Since it
+  // is often empty, use a unique_ptr to the SmallVector.
+  // Note that Violation itself contains a FunctionEffect which is the key.
+  using ImplVec = llvm::SmallVector<Violation, 1>;
+  std::unique_ptr<ImplVec> Impl;
+
+public:
+  // Insert a new Violation if we do not already have one for its effect.
+  void maybeInsert(const Violation &Viol) {
+    if (Impl == nullptr)
+      Impl = std::make_unique<ImplVec>();
+    else if (lookup(Viol.Effect) != nullptr)
+      return;
+
+    Impl->push_back(Viol);
+  }
+
+  const Violation *lookup(FunctionEffect Key) {
+    if (Impl == nullptr)
+      return nullptr;
+
+    auto *Iter =
+        std::find_if(Impl->begin(), Impl->end(),
+                     [&](const auto &Item) { return Item.Effect == Key; });
+    return Iter != Impl->end() ? &*Iter : nullptr;
+  }
+
+  size_t size() const { return Impl ? Impl->size() : 0; }
+};
+
+// ----------
+// State pertaining to a function whose AST is walked and whose effect analysis
+// is dependent on a subsequent analysis of other functions.
+class PendingFunctionAnalysis {
+  friend class CompleteFunctionAnalysis;
+
+public:
+  struct DirectCall {
+    const Decl *Callee;
+    SourceLocation CallLoc;
+    // Not all recursive calls are detected, just enough
+    // to break cycles.
+    bool Recursed = false;
+    ViolationSite VSite;
+
+    DirectCall(const Decl *D, SourceLocation CallLoc, ViolationSite VSite)
+        : Callee(D), CallLoc(CallLoc), VSite(VSite) {}
+  };
+
+  // We always have two disjoint sets of effects to verify:
+  // 1. Effects declared explicitly by this function.
+  // 2. All other inferrable effects needing verification.
+  FunctionEffectKindSet DeclaredVerifiableEffects;
+  FunctionEffectKindSet EffectsToInfer;
+
+private:
+  // Violations pertaining to the function's explicit effects.
+  SmallVector<Violation, 0> ViolationsForExplicitEffects;
+
+  // Violations pertaining to other, non-explicit, inferrable effects.
+  EffectToViolationMap InferrableEffectToFirstViolation;
+
+  // These unverified direct calls are what keeps the analysis "pending",
+  // until the callees can be verified.
+  SmallVector<DirectCall, 0> UnverifiedDirectCalls;
+
+public:
+  PendingFunctionAnalysis(Sema &S, const CallableInfo &CInfo,
+                          FunctionEffectKindSet AllInferrableEffectsToVerify)
+      : DeclaredVerifiableEffects(CInfo.Effects) {
+    // Check for effects we are not allowed to infer.
+    FunctionEffectKindSet InferrableEffects;
+
+    for (FunctionEffect effect : AllInferrableEffectsToVerify) {
+      std::optional<FunctionEffect> ProblemCalleeEffect =
+          effect.effectProhibitingInference(*CInfo.CDecl, CInfo.Effects);
+      if (!ProblemCalleeEffect)
+        InferrableEffects.insert(effect);
+      else {
+        // Add a Violation for this effect if a caller were to
+        // try to infer it.
+        InferrableEffectToFirstViolation.maybeInsert(Violation(
+            effect, ViolationID::DeclDisallowsInference, ViolationSite{},
+            CInfo.CDecl->getLocation(), nullptr, ProblemCalleeEffect));
+      }
+    }
+    // InferrableEffects is now the set of inferrable effects which are not
+    // prohibited.
+    EffectsToInfer = FunctionEffectKindSet::difference(
+        InferrableEffects, DeclaredVerifiableEffects);
+  }
+
+  // Hide the way that Violations for explicitly required effects vs. inferred
+  // ones are handled differently.
+  void checkAddViolation(bool Inferring, const Violation &NewViol) {
+    if (!Inferring)
+      ViolationsForExplicitEffects.push_back(NewViol);
+    else
+      InferrableEffectToFirstViolation.maybeInsert(NewViol);
+  }
+
+  void addUnverifiedDirectCall(const Decl *D, SourceLocation CallLoc,
+                               ViolationSite VSite) {
+    UnverifiedDirectCalls.emplace_back(D, CallLoc, VSite);
+  }
+
+  // Analysis is complete when there are no unverified direct calls.
+  bool isComplete() const { return UnverifiedDirectCalls.empty(); }
+
+  const Violation *violationForInferrableEffect(FunctionEffect effect) {
+    return InferrableEffectToFirstViolation.lookup(effect);
+  }
+
+  // Mutable because caller may need to set a DirectCall's Recursing flag.
+  MutableArrayRef<DirectCall> unverifiedCalls() {
+    assert(!isComplete());
+    return UnverifiedDirectCalls;
+  }
+
+  ArrayRef<Violation> getSortedViolationsForExplicitEffects(SourceManager &SM) {
+    if (!ViolationsForExplicitEffects.empty())
+      std::sort(ViolationsForExplicitEffects.begin(),
+                ViolationsForExplicitEffects.end(),
+                [&SM](const Violation &LHS, const Violation &RHS) {
+                  return SM.isBeforeInTranslationUnit(LHS.Loc, RHS.Loc);
+                });
+    return ViolationsForExplicitEffects;
+  }
+
+  void dump(Sema &SemaRef, llvm::raw_ostream &OS) const {
+    OS << "Pending: Declared ";
+    DeclaredVerifiableEffects.dump(OS);
+    OS << ", " << ViolationsForExplicitEffects.size() << " violations; ";
+    OS << " Infer ";
+    EffectsToInfer.dump(OS);
+    OS << ", " << InferrableEffectToFirstViolation.size() << " violations";
+    if (!UnverifiedDirectCalls.empty()) {
+      OS << "; Calls: ";
+      for (const DirectCall &Call : UnverifiedDirectCalls) {
+        CallableInfo CI(*Call.Callee);
+        OS << " " << CI.name(SemaRef);
+      }
+    }
+    OS << "\n";
+  }
+};
+
+// ----------
+class CompleteFunctionAnalysis {
+  // Current size: 2 pointers
+public:
+  // Has effects which are both the declared ones -- not to be inferred -- plus
+  // ones which have been successfully inferred. These are all considered
+  // "verified" for the purposes of callers; any issue with verifying declared
+  // effects has already been reported and is not the problem of any caller.
+  FunctionEffectKindSet VerifiedEffects;
+
+private:
+  // This is used to generate notes about failed inference.
+  EffectToViolationMap InferrableEffectToFirstViolation;
+
+public:
+  // The incoming Pending analysis is consumed (member(s) are moved-from).
+  CompleteFunctionAnalysis(ASTContext &Ctx, PendingFunctionAnalysis &&Pending,
+                           FunctionEffectKindSet DeclaredEffects,
+                           FunctionEffectKindSet AllInferrableEffectsToVerify)
+      : VerifiedEffects(DeclaredEffects) {
+    for (FunctionEffect effect : AllInferrableEffectsToVerify)
+      if (Pending.violationForInferrableEffect(effect) == nullptr)
+        VerifiedEffects.insert(effect);
+
+    InferrableEffectToFirstViolation =
+        std::move(Pending.InferrableEffectToFirstViolation);
+  }
+
+  const Violation *firstViolationForEffect(FunctionEffect Effect) {
+    return InferrableEffectToFirstViolation.lookup(Effect);
+  }
+
+  void dump(llvm::raw_ostream &OS) const {
+    OS << "Complete: Verified ";
+    VerifiedEffects.dump(OS);
+    OS << "; Infer ";
+    OS << InferrableEffectToFirstViolation.size() << " violations\n";
+  }
+};
+
+// ==========
+class Analyzer {
+  Sema &S;
+
+  // Subset of Sema.AllEffectsToVerify
+  FunctionEffectKindSet AllInferrableEffectsToVerify;
+
+  using FuncAnalysisPtr =
+      llvm::PointerUnion<PendingFunctionAnalysis *, CompleteFunctionAnalysis *>;
+
+  // Map all Decls analyzed to FuncAnalysisPtr. Pending state is larger
+  // than complete state, so use different objects to represent them.
+  // The state pointers are owned by the container.
+  class AnalysisMap : llvm::DenseMap<const Decl *, FuncAnalysisPtr> {
+    using Base = llvm::DenseMap<const Decl *, FuncAnalysisPtr>;
+
+  public:
+    ~AnalysisMap();
+
+    // Use non-public inheritance in order to maintain the invariant
+    // that lookups and insertions are via the canonical Decls.
+
+    FuncAnalysisPtr lookup(const Decl *Key) const {
+      return Base::lookup(Key->getCanonicalDecl());
+    }
+
+    FuncAnalysisPtr &operator[](const Decl *Key) {
+      return Base::operator[](Key->getCanonicalDecl());
+    }
+
+    /// Shortcut for the case where we only care about completed analysis.
+    CompleteFunctionAnalysis *completedAnalysisForDecl(const Decl *D) const {
+      if (FuncAnalysisPtr AP = lookup(D);
+          isa_and_nonnull<CompleteFunctionAnalysis *>(AP))
+        return AP.get<CompleteFunctionAnalysis *>();
+      return nullptr;
+    }
+
+    void dump(Sema &SemaRef, llvm::raw_ostream &OS) {
+      OS << "\nAnalysisMap:\n";
+      for (const auto &item : *this) {
+        CallableInfo CI(*item.first);
+        const auto AP = item.second;
+        OS << item.first << " " << CI.name(SemaRef) << " : ";
+        if (AP.isNull())
+          OS << "null\n";
+        else if (isa<CompleteFunctionAnalysis *>(AP)) {
----------------
dougsonos wrote:

Thanks. For many years I was into minimal braces, then went braces-everywhere, and still haven't quite adapted here...

https://github.com/llvm/llvm-project/pull/99656