[clang] nonblocking/nonallocating attributes: 2nd pass caller/callee analysis (PR #99656)

Fri Jul 19 07:57:35 PDT 2024

================
@@ -2397,6 +2397,1262 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
 };
 } // namespace
 
+// =============================================================================
+
+namespace FXAnalysis {
+
+enum class DiagnosticID : uint8_t {
+  None = 0, // sentinel for an empty Diagnostic
+  Throws,
+  Catches,
+  CallsObjC,
+  AllocatesMemory,
+  HasStaticLocal,
+  AccessesThreadLocal,
+
+  // These only apply to callees, where the analysis stops at the Decl
+  DeclDisallowsInference,
+
+  CallsDeclWithoutEffect,
+  CallsExprWithoutEffect,
+};
+
+// Holds an effect diagnosis, potentially for the entire duration of the
+// analysis phase, in order to refer to it when explaining why a caller has been
+// made unsafe by a callee.
+struct Diagnostic {
+  FunctionEffect Effect;
+  DiagnosticID ID = DiagnosticID::None;
+  SourceLocation Loc;
+  const Decl *Callee = nullptr; // only valid for Calls*
+
+  Diagnostic() = default;
+
+  Diagnostic(const FunctionEffect &Effect, DiagnosticID ID, SourceLocation Loc,
+             const Decl *Callee = nullptr)
+      : Effect(Effect), ID(ID), Loc(Loc), Callee(Callee) {}
+};
+
+enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete };
+enum class CallType {
+  // unknown: probably function pointer
+  Unknown,
+  Function,
+  Virtual,
+  Block
+};
+
+// Return whether a function's effects CAN be verified.
+// The question of whether it SHOULD be verified is independent.
+static bool functionIsVerifiable(const FunctionDecl *FD) {
+  if (!(FD->hasBody() || FD->isInlined())) {
+    // externally defined; we couldn't verify if we wanted to.
+    return false;
+  }
+  if (FD->isTrivial()) {
+    // Otherwise `struct x { int a; };` would have an unverifiable default
+    // constructor.
+    return true;
+  }
+  return true;
+}
+
+/// A mutable set of FunctionEffect, for use in places where any conditions
+/// have been resolved or can be ignored.
+class EffectSet {
+  // This implementation optimizes footprint, since we hold one of these for
+  // every function visited, which, due to inference, can be many more functions
+  // than have declared effects.
+
+  template <typename T, typename SizeT, SizeT Capacity> struct FixedVector {
+    SizeT Count = 0;
+    T Items[Capacity] = {};
+
+    using value_type = T;
+
+    using iterator = T *;
+    using const_iterator = const T *;
+    iterator begin() { return &Items[0]; }
+    iterator end() { return &Items[Count]; }
+    const_iterator begin() const { return &Items[0]; }
+    const_iterator end() const { return &Items[Count]; }
+    const_iterator cbegin() const { return &Items[0]; }
+    const_iterator cend() const { return &Items[Count]; }
+
+    void insert(iterator I, const T &Value) {
+      assert(Count < Capacity);
+      iterator E = end();
+      if (I != E)
+        std::copy_backward(I, E, E + 1);
+      *I = Value;
+      ++Count;
+    }
+
+    void push_back(const T &Value) {
+      assert(Count < Capacity);
+      Items[Count++] = Value;
+    }
+  };
+
+  // As long as FunctionEffect is only 1 byte, and there are only 2 verifiable
+  // effects, this fixed-size vector with a capacity of 7 is more than
+  // sufficient and is only 8 bytes.
+  FixedVector<FunctionEffect, uint8_t, 7> Impl;
+
+public:
+  EffectSet() = default;
+  explicit EffectSet(FunctionEffectsRef FX) { insert(FX); }
+
+  operator ArrayRef<FunctionEffect>() const {
+    return ArrayRef(Impl.cbegin(), Impl.cend());
+  }
+
+  using iterator = const FunctionEffect *;
+  iterator begin() const { return Impl.cbegin(); }
+  iterator end() const { return Impl.cend(); }
+
+  void insert(const FunctionEffect &Effect) {
+    FunctionEffect *Iter = Impl.begin();
+    FunctionEffect *End = Impl.end();
+    // linear search; lower_bound is overkill for a tiny vector like this
+    for (; Iter != End; ++Iter) {
+      if (*Iter == Effect)
+        return;
+      if (Effect < *Iter)
+        break;
+    }
+    Impl.insert(Iter, Effect);
+  }
+  void insert(const EffectSet &Set) {
+    for (const FunctionEffect &Item : Set) {
+      // push_back because set is already sorted
+      Impl.push_back(Item);
+    }
+  }
+  void insert(FunctionEffectsRef FX) {
+    for (const FunctionEffectWithCondition &EC : FX) {
+      assert(EC.Cond.getCondition() ==
+             nullptr); // should be resolved by now, right?
+      // push_back because set is already sorted
+      Impl.push_back(EC.Effect);
+    }
+  }
+  bool contains(const FunctionEffect::Kind EK) const {
+    for (const FunctionEffect &E : Impl)
+      if (E.kind() == EK)
+        return true;
+    return false;
+  }
+
+  void dump(llvm::raw_ostream &OS) const;
+
+  static EffectSet difference(ArrayRef<FunctionEffect> LHS,
+                              ArrayRef<FunctionEffect> RHS) {
+    EffectSet Result;
+    std::set_difference(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
+                        std::back_inserter(Result.Impl));
+    return Result;
+  }
+};
+
+LLVM_DUMP_METHOD void EffectSet::dump(llvm::raw_ostream &OS) const {
+  OS << "Effects{";
+  bool First = true;
+  for (const FunctionEffect &Effect : *this) {
+    if (!First)
+      OS << ", ";
+    else
+      First = false;
+    OS << Effect.name();
+  }
+  OS << "}";
+}
+
+// Transitory, more extended information about a callable, which can be a
+// function, block, function pointer, etc.
+struct CallableInfo {
+  // CDecl holds the function's definition, if any.
+  // FunctionDecl if CallType::Function or Virtual
+  // BlockDecl if CallType::Block
+  const Decl *CDecl;
+  mutable std::optional<std::string> MaybeName;
+  SpecialFuncType FuncType = SpecialFuncType::None;
+  EffectSet Effects;
+  CallType CType = CallType::Unknown;
+
+  CallableInfo(Sema &SemaRef, const Decl &CD,
+               SpecialFuncType FT = SpecialFuncType::None)
+      : CDecl(&CD), FuncType(FT) {
+    FunctionEffectsRef FXRef;
+
+    if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) {
+      // Use the function's definition, if any.
+      if (const FunctionDecl *Def = FD->getDefinition())
+        CDecl = FD = Def;
+      CType = CallType::Function;
+      if (auto *Method = dyn_cast<CXXMethodDecl>(FD);
+          Method && Method->isVirtual())
+        CType = CallType::Virtual;
+      FXRef = FD->getFunctionEffects();
+    } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) {
+      CType = CallType::Block;
+      FXRef = BD->getFunctionEffects();
+    } else if (auto *VD = dyn_cast<ValueDecl>(CDecl)) {
+      // ValueDecl is function, enum, or variable, so just look at its type.
+      FXRef = FunctionEffectsRef::get(VD->getType());
+    }
+    Effects = EffectSet(FXRef);
+  }
+
+  bool isDirectCall() const {
+    return CType == CallType::Function || CType == CallType::Block;
+  }
+
+  bool isVerifiable() const {
+    switch (CType) {
+    case CallType::Unknown:
+    case CallType::Virtual:
+      break;
+    case CallType::Block:
+      return true;
+    case CallType::Function:
+      return functionIsVerifiable(dyn_cast<FunctionDecl>(CDecl));
+    }
+    return false;
+  }
+
+  /// Generate a name for logging and diagnostics.
+  std::string name(Sema &Sem) const {
+    if (!MaybeName) {
+      std::string Name;
+      llvm::raw_string_ostream OS(Name);
+
+      if (auto *FD = dyn_cast<FunctionDecl>(CDecl))
+        FD->getNameForDiagnostic(OS, Sem.getPrintingPolicy(),
+                                 /*Qualified=*/true);
+      else if (auto *BD = dyn_cast<BlockDecl>(CDecl))
+        OS << "(block " << BD->getBlockManglingNumber() << ")";
+      else if (auto *VD = dyn_cast<NamedDecl>(CDecl))
+        VD->printQualifiedName(OS);
+      MaybeName = Name;
+    }
+    return *MaybeName;
+  }
+};
+
+// ----------
+// Map effects to single diagnostics, to hold the first (of potentially many)
+// diagnostics pertaining to an effect, per function.
+class EffectToDiagnosticMap {
+  // Since we currently only have a tiny number of effects (typically no more
+  // than 1), use a sorted SmallVector with an inline capacity of 1. Since it
+  // is often empty, use a unique_ptr to the SmallVector.
+  // Note that Diagnostic itself contains a FunctionEffect which is the key.
+  using ImplVec = llvm::SmallVector<Diagnostic, 1>;
+  std::unique_ptr<ImplVec> Impl;
+
+public:
+  // Insert a new diagnostic if we do not already have one for its effect.
+  void maybeInsert(const Diagnostic &Diag) {
+    if (Impl == nullptr)
+      Impl = std::make_unique<ImplVec>();
+    auto *Iter = _find(Diag.Effect);
+    if (Iter != Impl->end() && Iter->Effect == Diag.Effect)
+      return;
+
+    Impl->insert(Iter, Diag);
+  }
+
+  const Diagnostic *lookup(FunctionEffect Key) {
+    if (Impl == nullptr)
+      return nullptr;
+
+    auto *Iter = _find(Key);
+    if (Iter != Impl->end() && Iter->Effect == Key)
+      return &*Iter;
+
+    return nullptr;
+  }
+
+  size_t size() const { return Impl ? Impl->size() : 0; }
+
+private:
+  ImplVec::iterator _find(const FunctionEffect &key) {
+    // A linear search suffices for a tiny number of possible effects.
+    auto *End = Impl->end();
+    for (auto *Iter = Impl->begin(); Iter != End; ++Iter)
+      if (!(Iter->Effect < key))
+        return Iter;
+    return End;
+  }
+};
+
+// ----------
+// State pertaining to a function whose AST is walked and whose effect analysis
+// is dependent on a subsequent analysis of other functions.
+class PendingFunctionAnalysis {
+  friend class CompleteFunctionAnalysis;
+
+public:
+  struct DirectCall {
+    const Decl *Callee;
+    SourceLocation CallLoc;
+    // Not all recursive calls are detected, just enough
+    // to break cycles.
+    bool Recursed = false;
+
+    DirectCall(const Decl *D, SourceLocation CallLoc)
+        : Callee(D), CallLoc(CallLoc) {}
+  };
+
+  // We always have two disjoint sets of effects to verify:
+  // 1. Effects declared explicitly by this function.
+  // 2. All other inferrable effects needing verification.
+  EffectSet DeclaredVerifiableEffects;
+  EffectSet FXToInfer;
+
+private:
+  // Diagnostics pertaining to the function's explicit effects.
+  SmallVector<Diagnostic, 0> DiagnosticsForExplicitFX;
+
+  // Diagnostics pertaining to other, non-explicit, inferrable effects.
+  EffectToDiagnosticMap InferrableEffectToFirstDiagnostic;
+
+  // These unverified direct calls are what keeps the analysis "pending",
+  // until the callees can be verified.
+  SmallVector<DirectCall, 0> UnverifiedDirectCalls;
+
+public:
+  PendingFunctionAnalysis(
+      Sema &Sem, const CallableInfo &CInfo,
+      ArrayRef<FunctionEffect> AllInferrableEffectsToVerify) {
+    DeclaredVerifiableEffects = CInfo.Effects;
+
+    // Check for effects we are not allowed to infer
+    EffectSet InferrableFX;
+
+    for (const FunctionEffect &effect : AllInferrableEffectsToVerify) {
+      if (effect.canInferOnFunction(*CInfo.CDecl))
+        InferrableFX.insert(effect);
+      else {
+        // Add a diagnostic for this effect if a caller were to
+        // try to infer it.
+        InferrableEffectToFirstDiagnostic.maybeInsert(
+            Diagnostic(effect, DiagnosticID::DeclDisallowsInference,
+                       CInfo.CDecl->getLocation()));
+      }
+    }
+    // InferrableFX is now the set of inferrable effects which are not
+    // prohibited
+    FXToInfer = EffectSet::difference(InferrableFX, DeclaredVerifiableEffects);
+  }
+
+  // Hide the way that diagnostics for explicitly required effects vs. inferred
+  // ones are handled differently.
+  void checkAddDiagnostic(bool Inferring, const Diagnostic &NewDiag) {
+    if (!Inferring)
+      DiagnosticsForExplicitFX.push_back(NewDiag);
+    else
+      InferrableEffectToFirstDiagnostic.maybeInsert(NewDiag);
+  }
+
+  void addUnverifiedDirectCall(const Decl *D, SourceLocation CallLoc) {
+    UnverifiedDirectCalls.emplace_back(D, CallLoc);
+  }
+
+  // Analysis is complete when there are no unverified direct calls.
+  bool isComplete() const { return UnverifiedDirectCalls.empty(); }
+
+  const Diagnostic *diagnosticForInferrableEffect(FunctionEffect effect) {
+    return InferrableEffectToFirstDiagnostic.lookup(effect);
+  }
+
+  SmallVector<DirectCall, 0> &unverifiedCalls() {
+    assert(!isComplete());
+    return UnverifiedDirectCalls;
+  }
+
+  SmallVector<Diagnostic, 0> &getDiagnosticsForExplicitFX() {
+    return DiagnosticsForExplicitFX;
+  }
+
+  void dump(Sema &SemaRef, llvm::raw_ostream &OS) const {
+    OS << "Pending: Declared ";
+    DeclaredVerifiableEffects.dump(OS);
+    OS << ", " << DiagnosticsForExplicitFX.size() << " diags; ";
+    OS << " Infer ";
+    FXToInfer.dump(OS);
+    OS << ", " << InferrableEffectToFirstDiagnostic.size() << " diags";
+    if (!UnverifiedDirectCalls.empty()) {
+      OS << "; Calls: ";
+      for (const DirectCall &Call : UnverifiedDirectCalls) {
+        CallableInfo CI(SemaRef, *Call.Callee);
+        OS << " " << CI.name(SemaRef);
+      }
+    }
+    OS << "\n";
+  }
+};
+
+// ----------
+class CompleteFunctionAnalysis {
+  // Current size: 2 pointers
+public:
+  // Has effects which are both the declared ones -- not to be inferred -- plus
+  // ones which have been successfully inferred. These are all considered
+  // "verified" for the purposes of callers; any issue with verifying declared
+  // effects has already been reported and is not the problem of any caller.
+  EffectSet VerifiedEffects;
+
+private:
+  // This is used to generate notes about failed inference.
+  EffectToDiagnosticMap InferrableEffectToFirstDiagnostic;
+
+public:
+  // The incoming Pending analysis is consumed (member(s) are moved-from).
+  CompleteFunctionAnalysis(
+      ASTContext &Ctx, PendingFunctionAnalysis &Pending,
+      const EffectSet &DeclaredEffects,
+      ArrayRef<FunctionEffect> AllInferrableEffectsToVerify) {
+    VerifiedEffects.insert(DeclaredEffects);
+    for (const FunctionEffect &effect : AllInferrableEffectsToVerify)
+      if (Pending.diagnosticForInferrableEffect(effect) == nullptr)
+        VerifiedEffects.insert(effect);
+
+    InferrableEffectToFirstDiagnostic =
+        std::move(Pending.InferrableEffectToFirstDiagnostic);
+  }
+
+  const Diagnostic *firstDiagnosticForEffect(const FunctionEffect &Effect) {
+    return InferrableEffectToFirstDiagnostic.lookup(Effect);
+  }
+
+  void dump(llvm::raw_ostream &OS) const {
+    OS << "Complete: Verified ";
+    VerifiedEffects.dump(OS);
+    OS << "; Infer ";
+    OS << InferrableEffectToFirstDiagnostic.size() << " diags\n";
+  }
+};
+
+const Decl *CanonicalFunctionDecl(const Decl *D) {
+  if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+    FD = FD->getCanonicalDecl();
+    assert(FD != nullptr);
+    return FD;
+  }
+  return D;
+}
+
+// ==========
+class Analyzer {
+  constexpr static int DebugLogLevel = 0;
----------------
dougsonos wrote:

Controls whether some debug logging is enabled. I don't see precedent for this kind of debug logging and that may well argue for removing it; it was however useful during development and debugging.

https://github.com/llvm/llvm-project/pull/99656