[clang] nonblocking/nonallocating attributes: 2nd pass caller/callee analysis (PR #99656)
Doug Wyatt via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 7 08:27:49 PDT 2024
================
@@ -2397,6 +2397,1262 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
};
} // namespace
+// =============================================================================
+
+namespace FXAnalysis {
+
+enum class DiagnosticID : uint8_t {
+ None = 0, // sentinel for an empty Diagnostic
+ Throws,
+ Catches,
+ CallsObjC,
+ AllocatesMemory,
+ HasStaticLocal,
+ AccessesThreadLocal,
+
+ // These only apply to callees, where the analysis stops at the Decl
+ DeclDisallowsInference,
+
+ CallsDeclWithoutEffect,
+ CallsExprWithoutEffect,
+};
+
+// Holds an effect diagnosis, potentially for the entire duration of the
+// analysis phase, in order to refer to it when explaining why a caller has been
+// made unsafe by a callee.
+struct Diagnostic {
+ FunctionEffect Effect;
+ DiagnosticID ID = DiagnosticID::None;
+ SourceLocation Loc;
+ const Decl *Callee = nullptr; // only valid for Calls*
+
+ Diagnostic() = default;
+
+ Diagnostic(const FunctionEffect &Effect, DiagnosticID ID, SourceLocation Loc,
+ const Decl *Callee = nullptr)
+ : Effect(Effect), ID(ID), Loc(Loc), Callee(Callee) {}
+};
+
+enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete };
+enum class CallType {
+ // unknown: probably function pointer
+ Unknown,
+ Function,
+ Virtual,
+ Block
+};
+
+// Return whether a function's effects CAN be verified.
+// The question of whether it SHOULD be verified is independent.
+static bool functionIsVerifiable(const FunctionDecl *FD) {
+ if (!(FD->hasBody() || FD->isInlined())) {
+ // externally defined; we couldn't verify if we wanted to.
+ return false;
+ }
+ if (FD->isTrivial()) {
+ // Otherwise `struct x { int a; };` would have an unverifiable default
+ // constructor.
+ return true;
+ }
+ return true;
+}
+
+/// A mutable set of FunctionEffect, for use in places where any conditions
+/// have been resolved or can be ignored.
+class EffectSet {
+ // This implementation optimizes footprint, since we hold one of these for
+ // every function visited, which, due to inference, can be many more functions
+ // than have declared effects.
+
+ template <typename T, typename SizeT, SizeT Capacity> struct FixedVector {
+ SizeT Count = 0;
+ T Items[Capacity] = {};
+
+ using value_type = T;
+
+ using iterator = T *;
+ using const_iterator = const T *;
+ iterator begin() { return &Items[0]; }
+ iterator end() { return &Items[Count]; }
+ const_iterator begin() const { return &Items[0]; }
+ const_iterator end() const { return &Items[Count]; }
+ const_iterator cbegin() const { return &Items[0]; }
+ const_iterator cend() const { return &Items[Count]; }
+
+ void insert(iterator I, const T &Value) {
+ assert(Count < Capacity);
+ iterator E = end();
+ if (I != E)
+ std::copy_backward(I, E, E + 1);
+ *I = Value;
+ ++Count;
+ }
+
+ void push_back(const T &Value) {
+ assert(Count < Capacity);
+ Items[Count++] = Value;
+ }
+ };
+
+ // As long as FunctionEffect is only 1 byte, and there are only 2 verifiable
+ // effects, this fixed-size vector with a capacity of 7 is more than
+ // sufficient and is only 8 bytes.
+ FixedVector<FunctionEffect, uint8_t, 7> Impl;
+
+public:
+ EffectSet() = default;
+ explicit EffectSet(FunctionEffectsRef FX) { insert(FX); }
+
+ operator ArrayRef<FunctionEffect>() const {
+ return ArrayRef(Impl.cbegin(), Impl.cend());
+ }
+
+ using iterator = const FunctionEffect *;
+ iterator begin() const { return Impl.cbegin(); }
+ iterator end() const { return Impl.cend(); }
+
+ void insert(const FunctionEffect &Effect) {
+ FunctionEffect *Iter = Impl.begin();
+ FunctionEffect *End = Impl.end();
+ // linear search; lower_bound is overkill for a tiny vector like this
+ for (; Iter != End; ++Iter) {
+ if (*Iter == Effect)
+ return;
+ if (Effect < *Iter)
+ break;
+ }
+ Impl.insert(Iter, Effect);
+ }
+ void insert(const EffectSet &Set) {
+ for (const FunctionEffect &Item : Set) {
+ // push_back because set is already sorted
+ Impl.push_back(Item);
+ }
+ }
+ void insert(FunctionEffectsRef FX) {
+ for (const FunctionEffectWithCondition &EC : FX) {
+ assert(EC.Cond.getCondition() ==
+ nullptr); // should be resolved by now, right?
+ // push_back because set is already sorted
+ Impl.push_back(EC.Effect);
+ }
+ }
+ bool contains(const FunctionEffect::Kind EK) const {
+ for (const FunctionEffect &E : Impl)
+ if (E.kind() == EK)
+ return true;
+ return false;
+ }
+
+ void dump(llvm::raw_ostream &OS) const;
+
+ static EffectSet difference(ArrayRef<FunctionEffect> LHS,
+ ArrayRef<FunctionEffect> RHS) {
+ EffectSet Result;
+ std::set_difference(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
+ std::back_inserter(Result.Impl));
+ return Result;
+ }
+};
+
+LLVM_DUMP_METHOD void EffectSet::dump(llvm::raw_ostream &OS) const {
+ OS << "Effects{";
+ bool First = true;
+ for (const FunctionEffect &Effect : *this) {
+ if (!First)
+ OS << ", ";
+ else
+ First = false;
+ OS << Effect.name();
+ }
+ OS << "}";
+}
+
+// Transitory, more extended information about a callable, which can be a
+// function, block, function pointer, etc.
+struct CallableInfo {
+ // CDecl holds the function's definition, if any.
+ // FunctionDecl if CallType::Function or Virtual
+ // BlockDecl if CallType::Block
+ const Decl *CDecl;
+ mutable std::optional<std::string> MaybeName;
+ SpecialFuncType FuncType = SpecialFuncType::None;
+ EffectSet Effects;
+ CallType CType = CallType::Unknown;
+
+ CallableInfo(Sema &SemaRef, const Decl &CD,
+ SpecialFuncType FT = SpecialFuncType::None)
+ : CDecl(&CD), FuncType(FT) {
+ FunctionEffectsRef FXRef;
+
+ if (auto *FD = dyn_cast<FunctionDecl>(CDecl)) {
+ // Use the function's definition, if any.
+ if (const FunctionDecl *Def = FD->getDefinition())
+ CDecl = FD = Def;
+ CType = CallType::Function;
+ if (auto *Method = dyn_cast<CXXMethodDecl>(FD);
+ Method && Method->isVirtual())
+ CType = CallType::Virtual;
+ FXRef = FD->getFunctionEffects();
+ } else if (auto *BD = dyn_cast<BlockDecl>(CDecl)) {
+ CType = CallType::Block;
+ FXRef = BD->getFunctionEffects();
+ } else if (auto *VD = dyn_cast<ValueDecl>(CDecl)) {
+ // ValueDecl is function, enum, or variable, so just look at its type.
+ FXRef = FunctionEffectsRef::get(VD->getType());
+ }
+ Effects = EffectSet(FXRef);
+ }
+
+ bool isDirectCall() const {
+ return CType == CallType::Function || CType == CallType::Block;
+ }
+
+ bool isVerifiable() const {
+ switch (CType) {
+ case CallType::Unknown:
+ case CallType::Virtual:
+ break;
+ case CallType::Block:
+ return true;
+ case CallType::Function:
+ return functionIsVerifiable(dyn_cast<FunctionDecl>(CDecl));
+ }
+ return false;
+ }
+
+ /// Generate a name for logging and diagnostics.
+ std::string name(Sema &Sem) const {
+ if (!MaybeName) {
+ std::string Name;
+ llvm::raw_string_ostream OS(Name);
+
+ if (auto *FD = dyn_cast<FunctionDecl>(CDecl))
+ FD->getNameForDiagnostic(OS, Sem.getPrintingPolicy(),
+ /*Qualified=*/true);
+ else if (auto *BD = dyn_cast<BlockDecl>(CDecl))
+ OS << "(block " << BD->getBlockManglingNumber() << ")";
+ else if (auto *VD = dyn_cast<NamedDecl>(CDecl))
+ VD->printQualifiedName(OS);
+ MaybeName = Name;
+ }
+ return *MaybeName;
+ }
+};
+
+// ----------
+// Map effects to single diagnostics, to hold the first (of potentially many)
+// diagnostics pertaining to an effect, per function.
+class EffectToDiagnosticMap {
+ // Since we currently only have a tiny number of effects (typically no more
+ // than 1), use a sorted SmallVector with an inline capacity of 1. Since it
+ // is often empty, use a unique_ptr to the SmallVector.
+ // Note that Diagnostic itself contains a FunctionEffect which is the key.
+ using ImplVec = llvm::SmallVector<Diagnostic, 1>;
+ std::unique_ptr<ImplVec> Impl;
+
+public:
+ // Insert a new diagnostic if we do not already have one for its effect.
+ void maybeInsert(const Diagnostic &Diag) {
+ if (Impl == nullptr)
+ Impl = std::make_unique<ImplVec>();
+ auto *Iter = _find(Diag.Effect);
+ if (Iter != Impl->end() && Iter->Effect == Diag.Effect)
+ return;
+
+ Impl->insert(Iter, Diag);
+ }
+
+ const Diagnostic *lookup(FunctionEffect Key) {
+ if (Impl == nullptr)
+ return nullptr;
+
+ auto *Iter = _find(Key);
+ if (Iter != Impl->end() && Iter->Effect == Key)
+ return &*Iter;
+
+ return nullptr;
+ }
+
+ size_t size() const { return Impl ? Impl->size() : 0; }
+
+private:
+ ImplVec::iterator _find(const FunctionEffect &key) {
+ // A linear search suffices for a tiny number of possible effects.
+ auto *End = Impl->end();
+ for (auto *Iter = Impl->begin(); Iter != End; ++Iter)
+ if (!(Iter->Effect < key))
+ return Iter;
+ return End;
+ }
+};
+
+// ----------
+// State pertaining to a function whose AST is walked and whose effect analysis
+// is dependent on a subsequent analysis of other functions.
+class PendingFunctionAnalysis {
+ friend class CompleteFunctionAnalysis;
+
+public:
+ struct DirectCall {
+ const Decl *Callee;
+ SourceLocation CallLoc;
+ // Not all recursive calls are detected, just enough
+ // to break cycles.
+ bool Recursed = false;
+
+ DirectCall(const Decl *D, SourceLocation CallLoc)
+ : Callee(D), CallLoc(CallLoc) {}
+ };
+
+ // We always have two disjoint sets of effects to verify:
+ // 1. Effects declared explicitly by this function.
+ // 2. All other inferrable effects needing verification.
+ EffectSet DeclaredVerifiableEffects;
+ EffectSet FXToInfer;
+
+private:
+ // Diagnostics pertaining to the function's explicit effects.
+ SmallVector<Diagnostic, 0> DiagnosticsForExplicitFX;
+
+ // Diagnostics pertaining to other, non-explicit, inferrable effects.
+ EffectToDiagnosticMap InferrableEffectToFirstDiagnostic;
+
+ // These unverified direct calls are what keeps the analysis "pending",
+ // until the callees can be verified.
+ SmallVector<DirectCall, 0> UnverifiedDirectCalls;
+
+public:
+ PendingFunctionAnalysis(
+ Sema &Sem, const CallableInfo &CInfo,
+ ArrayRef<FunctionEffect> AllInferrableEffectsToVerify) {
+ DeclaredVerifiableEffects = CInfo.Effects;
+
+ // Check for effects we are not allowed to infer
+ EffectSet InferrableFX;
+
+ for (const FunctionEffect &effect : AllInferrableEffectsToVerify) {
+ if (effect.canInferOnFunction(*CInfo.CDecl))
+ InferrableFX.insert(effect);
+ else {
+ // Add a diagnostic for this effect if a caller were to
+ // try to infer it.
+ InferrableEffectToFirstDiagnostic.maybeInsert(
+ Diagnostic(effect, DiagnosticID::DeclDisallowsInference,
+ CInfo.CDecl->getLocation()));
+ }
+ }
+ // InferrableFX is now the set of inferrable effects which are not
+ // prohibited
+ FXToInfer = EffectSet::difference(InferrableFX, DeclaredVerifiableEffects);
+ }
+
+ // Hide the way that diagnostics for explicitly required effects vs. inferred
+ // ones are handled differently.
+ void checkAddDiagnostic(bool Inferring, const Diagnostic &NewDiag) {
+ if (!Inferring)
+ DiagnosticsForExplicitFX.push_back(NewDiag);
+ else
+ InferrableEffectToFirstDiagnostic.maybeInsert(NewDiag);
+ }
+
+ void addUnverifiedDirectCall(const Decl *D, SourceLocation CallLoc) {
+ UnverifiedDirectCalls.emplace_back(D, CallLoc);
+ }
+
+ // Analysis is complete when there are no unverified direct calls.
+ bool isComplete() const { return UnverifiedDirectCalls.empty(); }
+
+ const Diagnostic *diagnosticForInferrableEffect(FunctionEffect effect) {
+ return InferrableEffectToFirstDiagnostic.lookup(effect);
+ }
+
+ SmallVector<DirectCall, 0> &unverifiedCalls() {
+ assert(!isComplete());
+ return UnverifiedDirectCalls;
+ }
+
+ SmallVector<Diagnostic, 0> &getDiagnosticsForExplicitFX() {
+ return DiagnosticsForExplicitFX;
+ }
+
+ void dump(Sema &SemaRef, llvm::raw_ostream &OS) const {
+ OS << "Pending: Declared ";
+ DeclaredVerifiableEffects.dump(OS);
+ OS << ", " << DiagnosticsForExplicitFX.size() << " diags; ";
+ OS << " Infer ";
+ FXToInfer.dump(OS);
+ OS << ", " << InferrableEffectToFirstDiagnostic.size() << " diags";
+ if (!UnverifiedDirectCalls.empty()) {
+ OS << "; Calls: ";
+ for (const DirectCall &Call : UnverifiedDirectCalls) {
+ CallableInfo CI(SemaRef, *Call.Callee);
+ OS << " " << CI.name(SemaRef);
+ }
+ }
+ OS << "\n";
+ }
+};
+
+// ----------
+class CompleteFunctionAnalysis {
+ // Current size: 2 pointers
+public:
+ // Has effects which are both the declared ones -- not to be inferred -- plus
+ // ones which have been successfully inferred. These are all considered
+ // "verified" for the purposes of callers; any issue with verifying declared
+ // effects has already been reported and is not the problem of any caller.
+ EffectSet VerifiedEffects;
+
+private:
+ // This is used to generate notes about failed inference.
+ EffectToDiagnosticMap InferrableEffectToFirstDiagnostic;
+
+public:
+ // The incoming Pending analysis is consumed (member(s) are moved-from).
+ CompleteFunctionAnalysis(
+ ASTContext &Ctx, PendingFunctionAnalysis &Pending,
+ const EffectSet &DeclaredEffects,
+ ArrayRef<FunctionEffect> AllInferrableEffectsToVerify) {
+ VerifiedEffects.insert(DeclaredEffects);
+ for (const FunctionEffect &effect : AllInferrableEffectsToVerify)
+ if (Pending.diagnosticForInferrableEffect(effect) == nullptr)
+ VerifiedEffects.insert(effect);
+
+ InferrableEffectToFirstDiagnostic =
+ std::move(Pending.InferrableEffectToFirstDiagnostic);
+ }
+
+ const Diagnostic *firstDiagnosticForEffect(const FunctionEffect &Effect) {
+ return InferrableEffectToFirstDiagnostic.lookup(Effect);
+ }
+
+ void dump(llvm::raw_ostream &OS) const {
+ OS << "Complete: Verified ";
+ VerifiedEffects.dump(OS);
+ OS << "; Infer ";
+ OS << InferrableEffectToFirstDiagnostic.size() << " diags\n";
+ }
+};
+
+const Decl *CanonicalFunctionDecl(const Decl *D) {
+ if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+ FD = FD->getCanonicalDecl();
+ assert(FD != nullptr);
+ return FD;
+ }
+ return D;
+}
+
+// ==========
+class Analyzer {
+ constexpr static int DebugLogLevel = 0;
----------------
dougsonos wrote:
Done, thanks for the pointer!
https://github.com/llvm/llvm-project/pull/99656
More information about the cfe-commits
mailing list