[clang] nonblocking/nonallocating attributes: 2nd pass caller/callee analysis (PR #99656)

Fri Jul 26 13:53:42 PDT 2024

================
@@ -2397,6 +2397,1262 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
 };
 } // namespace
 
+// =============================================================================
+
+namespace FXAnalysis {
+
+enum class DiagnosticID : uint8_t {
+  None = 0, // sentinel for an empty Diagnostic
+  Throws,
+  Catches,
+  CallsObjC,
+  AllocatesMemory,
+  HasStaticLocal,
+  AccessesThreadLocal,
+
+  // These only apply to callees, where the analysis stops at the Decl
+  DeclDisallowsInference,
+
+  CallsDeclWithoutEffect,
+  CallsExprWithoutEffect,
+};
+
+// Holds an effect diagnosis, potentially for the entire duration of the
+// analysis phase, in order to refer to it when explaining why a caller has been
+// made unsafe by a callee.
+struct Diagnostic {
+  FunctionEffect Effect;
+  DiagnosticID ID = DiagnosticID::None;
+  SourceLocation Loc;
+  const Decl *Callee = nullptr; // only valid for Calls*
+
+  Diagnostic() = default;
+
+  Diagnostic(const FunctionEffect &Effect, DiagnosticID ID, SourceLocation Loc,
+             const Decl *Callee = nullptr)
+      : Effect(Effect), ID(ID), Loc(Loc), Callee(Callee) {}
+};
+
+enum class SpecialFuncType : uint8_t { None, OperatorNew, OperatorDelete };
+enum class CallType {
+  // unknown: probably function pointer
+  Unknown,
+  Function,
+  Virtual,
+  Block
+};
+
+// Return whether a function's effects CAN be verified.
+// The question of whether it SHOULD be verified is independent.
+static bool functionIsVerifiable(const FunctionDecl *FD) {
+  if (!(FD->hasBody() || FD->isInlined())) {
+    // externally defined; we couldn't verify if we wanted to.
+    return false;
+  }
+  if (FD->isTrivial()) {
+    // Otherwise `struct x { int a; };` would have an unverifiable default
+    // constructor.
+    return true;
+  }
+  return true;
+}
+
+/// A mutable set of FunctionEffect, for use in places where any conditions
+/// have been resolved or can be ignored.
+class EffectSet {
+  // This implementation optimizes footprint, since we hold one of these for
+  // every function visited, which, due to inference, can be many more functions
+  // than have declared effects.
+
+  template <typename T, typename SizeT, SizeT Capacity> struct FixedVector {
+    SizeT Count = 0;
+    T Items[Capacity] = {};
+
+    using value_type = T;
+
+    using iterator = T *;
+    using const_iterator = const T *;
+    iterator begin() { return &Items[0]; }
+    iterator end() { return &Items[Count]; }
+    const_iterator begin() const { return &Items[0]; }
+    const_iterator end() const { return &Items[Count]; }
+    const_iterator cbegin() const { return &Items[0]; }
+    const_iterator cend() const { return &Items[Count]; }
+
+    void insert(iterator I, const T &Value) {
+      assert(Count < Capacity);
+      iterator E = end();
+      if (I != E)
+        std::copy_backward(I, E, E + 1);
+      *I = Value;
+      ++Count;
+    }
+
+    void push_back(const T &Value) {
+      assert(Count < Capacity);
+      Items[Count++] = Value;
+    }
+  };
+
+  // As long as FunctionEffect is only 1 byte, and there are only 2 verifiable
+  // effects, this fixed-size vector with a capacity of 7 is more than
+  // sufficient and is only 8 bytes.
+  FixedVector<FunctionEffect, uint8_t, 7> Impl;
+
+public:
+  EffectSet() = default;
+  explicit EffectSet(FunctionEffectsRef FX) { insert(FX); }
+
+  operator ArrayRef<FunctionEffect>() const {
+    return ArrayRef(Impl.cbegin(), Impl.cend());
+  }
+
+  using iterator = const FunctionEffect *;
+  iterator begin() const { return Impl.cbegin(); }
+  iterator end() const { return Impl.cend(); }
+
+  void insert(const FunctionEffect &Effect) {
+    FunctionEffect *Iter = Impl.begin();
+    FunctionEffect *End = Impl.end();
+    // linear search; lower_bound is overkill for a tiny vector like this
+    for (; Iter != End; ++Iter) {
+      if (*Iter == Effect)
+        return;
+      if (Effect < *Iter)
+        break;
+    }
+    Impl.insert(Iter, Effect);
+  }
+  void insert(const EffectSet &Set) {
+    for (const FunctionEffect &Item : Set) {
+      // push_back because set is already sorted
+      Impl.push_back(Item);
+    }
+  }
+  void insert(FunctionEffectsRef FX) {
+    for (const FunctionEffectWithCondition &EC : FX) {
+      assert(EC.Cond.getCondition() ==
+             nullptr); // should be resolved by now, right?
+      // push_back because set is already sorted
+      Impl.push_back(EC.Effect);
+    }
+  }
+  bool contains(const FunctionEffect::Kind EK) const {
+    for (const FunctionEffect &E : Impl)
+      if (E.kind() == EK)
+        return true;
+    return false;
+  }
+
+  void dump(llvm::raw_ostream &OS) const;
+
+  static EffectSet difference(ArrayRef<FunctionEffect> LHS,
+                              ArrayRef<FunctionEffect> RHS) {
+    EffectSet Result;
+    std::set_difference(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
+                        std::back_inserter(Result.Impl));
+    return Result;
+  }
+};
+
+LLVM_DUMP_METHOD void EffectSet::dump(llvm::raw_ostream &OS) const {
+  OS << "Effects{";
+  bool First = true;
+  for (const FunctionEffect &Effect : *this) {
+    if (!First)
+      OS << ", ";
+    else
+      First = false;
+    OS << Effect.name();
+  }
+  OS << "}";
+}
+
+// Transitory, more extended information about a callable, which can be a
+// function, block, function pointer, etc.
+struct CallableInfo {
+  // CDecl holds the function's definition, if any.
+  // FunctionDecl if CallType::Function or Virtual
+  // BlockDecl if CallType::Block
+  const Decl *CDecl;
+  mutable std::optional<std::string> MaybeName;
----------------
Sirraide wrote:

I was mostly thinking about making a separate function that does all of that and returns the call type that we call whenever we need it instead of storing it. Caching might make sense if it’s likely that we’ll often be using it multiple times per `CallbleInfo`.

https://github.com/llvm/llvm-project/pull/99656