[clang] 2709998 - [NFC][analyzer] Framework for multipart checkers (#130985)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 17 02:20:01 PDT 2025
Author: DonĂ¡t Nagy
Date: 2025-03-17T10:19:57+01:00
New Revision: 27099982da2f5a6c2d282d6b385e79d080669546
URL: https://github.com/llvm/llvm-project/commit/27099982da2f5a6c2d282d6b385e79d080669546
DIFF: https://github.com/llvm/llvm-project/commit/27099982da2f5a6c2d282d6b385e79d080669546.diff
LOG: [NFC][analyzer] Framework for multipart checkers (#130985)
In the static analyzer codebase we have a traditional pattern where a
single checker class (and its singleton instance) acts as the
implementation of several (user-facing or modeling) checkers that have
shared state and logic, but have their own names and can be enabled or
disabled separately.
Currently these multipart checker classes all reimplement the same
boilerplate logic to store the enabled/disabled state, the name and the
bug types associated with the checker parts. This commit extends
`CheckerBase`, `BugType` and the checker registration process to offer
an easy-to-use alternative to that boilerplate (which includes the ugly
lazy initialization of `mutable std::unique_ptr<BugType>`s).
In this new framework the single-part checkers are internally
represented as "multipart checkers with just one part" (because this way
I don't need to reimplement the same logic twice) but this does not
require any changes in the code of simple single-part checkers.
I do not claim that these multi-part checkers are perfect from an
architectural point of view; but they won't suddenly disappear after
many years of existence, so we might as well introduce a clear framework
for them. (Switching to e.g. 1:1 correspondence between checker classes
and checker names would be a prohibitively complex change.)
This PR ports `DivZeroChecker` to the new framework as a proof of
concept. I'm planning to do a series of follow-up commits to port the
rest of the multi-part checker.
Added:
Modified:
clang/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
clang/include/clang/StaticAnalyzer/Core/Checker.h
clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
clang/lib/StaticAnalyzer/Core/Checker.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
index 97cf7eda0ae2f..3a635e0d0125a 100644
--- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
+++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
@@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include <string>
+#include <variant>
namespace clang {
@@ -26,36 +27,53 @@ class BugReporter;
class BugType {
private:
- const CheckerNameRef CheckerName;
+ struct CheckerPartRef {
+ const CheckerBase *Checker;
+ CheckerPartIdx Idx;
+ };
+ using CheckerNameInfo = std::variant<CheckerNameRef, CheckerPartRef>;
+
+ const CheckerNameInfo CheckerName;
const std::string Description;
const std::string Category;
- const CheckerBase *Checker;
bool SuppressOnSink;
virtual void anchor();
public:
+ // Straightforward constructor where the checker name is specified directly.
+ // TODO: As far as I know all applications of this constructor involve ugly
+ // hacks that could be avoided by switching to a
diff erent constructor.
+ // When those are all eliminated, this constructor should be removed to
+ // eliminate the `variant` and simplify this class.
BugType(CheckerNameRef CheckerName, StringRef Desc,
StringRef Cat = categories::LogicError, bool SuppressOnSink = false)
: CheckerName(CheckerName), Description(Desc), Category(Cat),
- Checker(nullptr), SuppressOnSink(SuppressOnSink) {}
+ SuppressOnSink(SuppressOnSink) {}
+ // Constructor that can be called from the constructor of a checker object.
+ // At that point the checker name is not yet available, but we can save a
+ // pointer to the checker and later use that to query the name.
BugType(const CheckerBase *Checker, StringRef Desc,
StringRef Cat = categories::LogicError, bool SuppressOnSink = false)
- : CheckerName(), Description(Desc), Category(Cat), Checker(Checker),
- SuppressOnSink(SuppressOnSink) {}
+ : CheckerName(CheckerPartRef{Checker, DefaultPart}), Description(Desc),
+ Category(Cat), SuppressOnSink(SuppressOnSink) {}
+ // Constructor that can be called from the constructor of a checker object
+ // when it has multiple parts with separate names. We save the name and the
+ // part index to be able to query the name of that part later.
+ BugType(const CheckerBase *Checker, CheckerPartIdx Idx, StringRef Desc,
+ StringRef Cat = categories::LogicError, bool SuppressOnSink = false)
+ : CheckerName(CheckerPartRef{Checker, Idx}), Description(Desc),
+ Category(Cat), SuppressOnSink(SuppressOnSink) {}
virtual ~BugType() = default;
StringRef getDescription() const { return Description; }
StringRef getCategory() const { return Category; }
StringRef getCheckerName() const {
- // FIXME: This is a workaround to ensure that the correct checker name is
- // used. The checker names are set after the constructors are run.
- // In case the BugType object is initialized in the checker's ctor
- // the CheckerName field will be empty. To circumvent this problem we use
- // CheckerBase whenever it is possible.
- StringRef Ret = Checker ? Checker->getName() : CheckerName;
- assert(!Ret.empty() && "Checker name is not set properly.");
- return Ret;
+ if (const auto *CNR = std::get_if<CheckerNameRef>(&CheckerName))
+ return *CNR;
+
+ auto [Checker, Idx] = std::get<CheckerPartRef>(CheckerName);
+ return Checker->getName(Idx);
}
/// isSuppressOnSink - Returns true if bug reports associated with this bug
diff --git a/clang/include/clang/StaticAnalyzer/Core/Checker.h b/clang/include/clang/StaticAnalyzer/Core/Checker.h
index 4d9b33cc559b8..df29be8ba3f62 100644
--- a/clang/include/clang/StaticAnalyzer/Core/Checker.h
+++ b/clang/include/clang/StaticAnalyzer/Core/Checker.h
@@ -485,16 +485,60 @@ class Call {
} // end eval namespace
class CheckerBase : public ProgramPointTag {
- CheckerNameRef Name;
+ /// A single checker class (i.e. a subclass of `CheckerBase`) can implement
+ /// multiple user-facing checkers that have separate names and can be enabled
+ /// separately, but are backed by the same singleton checker object.
+ SmallVector<std::optional<CheckerNameRef>, 1> RegisteredNames;
+
friend class ::clang::ento::CheckerManager;
public:
- StringRef getTagDescription() const override;
- CheckerNameRef getName() const;
+ CheckerNameRef getName(CheckerPartIdx Idx = DefaultPart) const {
+ assert(Idx < RegisteredNames.size() && "Checker part index is too large!");
+ std::optional<CheckerNameRef> Name = RegisteredNames[Idx];
+ assert(Name && "Requested checker part is not registered!");
+ return *Name;
+ }
+
+ bool isPartEnabled(CheckerPartIdx Idx) const {
+ return Idx < RegisteredNames.size() && RegisteredNames[Idx].has_value();
+ }
+
+ void registerCheckerPart(CheckerPartIdx Idx, CheckerNameRef Name) {
+ // Paranoia: notice if e.g. UINT_MAX is passed as a checker part index.
+ assert(Idx < 256 && "Checker part identifiers should be small integers.");
+
+ if (Idx >= RegisteredNames.size())
+ RegisteredNames.resize(Idx + 1, std::nullopt);
+
+ assert(!RegisteredNames[Idx] && "Repeated registration of checker a part!");
+ RegisteredNames[Idx] = Name;
+ }
+
+ StringRef getTagDescription() const override {
+ // This method inherited from `ProgramPointTag` has two unrelated roles:
+ // (1) The analyzer option handling logic uses this method to query the
+ // name of a checker.
+ // (2) When the `ExplodedGraph` is dumped in DOT format for debugging,
+ // this is called to attach a description to the nodes. (This happens
+ // for all subclasses of `ProgramPointTag`, not just checkers.)
+ // FIXME: Application (1) should be aware of multiple parts within the same
+ // checker class instance, so it should directly use `getName` instead of
+ // this inherited interface which cannot support a `CheckerPartIdx`.
+ // FIXME: Ideally application (2) should return a string that describes the
+ // whole checker class, not just one of it parts. However, this is only for
+ // debugging, so returning the name of one part is probably good enough.
+ for (const auto &OptName : RegisteredNames)
+ if (OptName)
+ return *OptName;
+
+ return "Unregistered checker";
+ }
- /// See CheckerManager::runCheckersForPrintState.
+ /// Debug state dump callback, see CheckerManager::runCheckersForPrintState.
+ /// Default implementation does nothing.
virtual void printState(raw_ostream &Out, ProgramStateRef State,
- const char *NL, const char *Sep) const { }
+ const char *NL, const char *Sep) const;
};
/// Dump checker name to stream.
diff --git a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 929ce96824e95..44ae3ebe3d09d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -116,6 +116,19 @@ class CheckerNameRef {
operator StringRef() const { return Name; }
};
+/// A single checker class (and its singleton instance) can act as the
+/// implementation of several (user-facing or modeling) checker parts that
+/// have shared state and logic, but have their own names and can be enabled or
+/// disabled separately.
+/// Each checker class that implement multiple parts introduces its own enum
+/// type to assign small numerical indices (0, 1, 2 ...) to their parts. The
+/// type alias 'CheckerPartIdx' is conceptually the union of these enum types.
+using CheckerPartIdx = unsigned;
+
+/// If a checker doesn't have multiple parts, then its single part is
+/// represented by this index.
+constexpr inline CheckerPartIdx DefaultPart = 0;
+
enum class ObjCMessageVisitKind {
Pre,
Post,
@@ -190,23 +203,38 @@ class CheckerManager {
// Checker registration.
//===--------------------------------------------------------------------===//
- /// Used to register checkers.
- /// All arguments are automatically passed through to the checker
- /// constructor.
+ /// Construct the singleton instance of a checker, register it for the
+ /// supported callbacks and record its name with `registerCheckerPart()`.
+ /// Arguments passed to this function are forwarded to the constructor of the
+ /// checker.
+ ///
+ /// If `CHECKER` has multiple parts, then the constructor call and the
+ /// callback registration only happen within the first `registerChecker()`
+ /// call; while the subsequent calls only enable additional parts of the
+ /// existing checker object (while registering their names).
///
/// \returns a pointer to the checker object.
- template <typename CHECKER, typename... AT>
- CHECKER *registerChecker(AT &&... Args) {
+ template <typename CHECKER, CheckerPartIdx Idx = DefaultPart, typename... AT>
+ CHECKER *registerChecker(AT &&...Args) {
+ // This assert could be removed but then we need to make sure that calls
+ // registering
diff erent parts of the same checker pass the same arguments.
+ static_assert(
+ Idx == DefaultPart || !sizeof...(AT),
+ "Argument forwarding isn't supported with multi-part checkers!");
+
CheckerTag Tag = getTag<CHECKER>();
- std::unique_ptr<CheckerBase> &Ref = CheckerTags[Tag];
- assert(!Ref && "Checker already registered, use getChecker!");
- std::unique_ptr<CHECKER> Checker =
- std::make_unique<CHECKER>(std::forward<AT>(Args)...);
- Checker->Name = CurrentCheckerName;
- CHECKER::_register(Checker.get(), *this);
- Ref = std::move(Checker);
- return static_cast<CHECKER *>(Ref.get());
+ std::unique_ptr<CheckerBase> &Ref = CheckerTags[Tag];
+ if (!Ref) {
+ std::unique_ptr<CHECKER> Checker =
+ std::make_unique<CHECKER>(std::forward<AT>(Args)...);
+ CHECKER::_register(Checker.get(), *this);
+ Ref = std::move(Checker);
+ }
+
+ CHECKER *Result = static_cast<CHECKER *>(Ref.get());
+ Result->registerCheckerPart(Idx, CurrentCheckerName);
+ return Result;
}
template <typename CHECKER>
diff --git a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 7c8b44eb05942..3dd57732305b2 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -34,10 +34,14 @@ class DivZeroChecker : public Checker<check::PreStmt<BinaryOperator>> {
public:
/// This checker class implements several user facing checkers
- enum CheckKind { CK_DivideZero, CK_TaintedDivChecker, CK_NumCheckKinds };
- bool ChecksEnabled[CK_NumCheckKinds] = {false};
- CheckerNameRef CheckNames[CK_NumCheckKinds];
- mutable std::unique_ptr<BugType> BugTypes[CK_NumCheckKinds];
+ enum : CheckerPartIdx {
+ DivideZeroChecker,
+ TaintedDivChecker,
+ NumCheckerParts
+ };
+ BugType BugTypes[NumCheckerParts] = {
+ {this, DivideZeroChecker, "Division by zero"},
+ {this, TaintedDivChecker, "Division by zero", categories::TaintedData}};
void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
};
@@ -52,14 +56,11 @@ static const Expr *getDenomExpr(const ExplodedNode *N) {
void DivZeroChecker::reportBug(StringRef Msg, ProgramStateRef StateZero,
CheckerContext &C) const {
- if (!ChecksEnabled[CK_DivideZero])
+ if (!isPartEnabled(DivideZeroChecker))
return;
- if (!BugTypes[CK_DivideZero])
- BugTypes[CK_DivideZero].reset(
- new BugType(CheckNames[CK_DivideZero], "Division by zero"));
if (ExplodedNode *N = C.generateErrorNode(StateZero)) {
- auto R = std::make_unique<PathSensitiveBugReport>(*BugTypes[CK_DivideZero],
- Msg, N);
+ auto R = std::make_unique<PathSensitiveBugReport>(
+ BugTypes[DivideZeroChecker], Msg, N);
bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
C.emitReport(std::move(R));
}
@@ -68,15 +69,11 @@ void DivZeroChecker::reportBug(StringRef Msg, ProgramStateRef StateZero,
void DivZeroChecker::reportTaintBug(
StringRef Msg, ProgramStateRef StateZero, CheckerContext &C,
llvm::ArrayRef<SymbolRef> TaintedSyms) const {
- if (!ChecksEnabled[CK_TaintedDivChecker])
+ if (!isPartEnabled(TaintedDivChecker))
return;
- if (!BugTypes[CK_TaintedDivChecker])
- BugTypes[CK_TaintedDivChecker].reset(
- new BugType(CheckNames[CK_TaintedDivChecker], "Division by zero",
- categories::TaintedData));
if (ExplodedNode *N = C.generateNonFatalErrorNode(StateZero)) {
auto R = std::make_unique<PathSensitiveBugReport>(
- *BugTypes[CK_TaintedDivChecker], Msg, N);
+ BugTypes[TaintedDivChecker], Msg, N);
bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
for (auto Sym : TaintedSyms)
R->markInteresting(Sym);
@@ -129,28 +126,16 @@ void DivZeroChecker::checkPreStmt(const BinaryOperator *B,
C.addTransition(stateNotZero);
}
-void ento::registerDivZeroChecker(CheckerManager &mgr) {
- DivZeroChecker *checker = mgr.registerChecker<DivZeroChecker>();
- checker->ChecksEnabled[DivZeroChecker::CK_DivideZero] = true;
- checker->CheckNames[DivZeroChecker::CK_DivideZero] =
- mgr.getCurrentCheckerName();
+void ento::registerDivZeroChecker(CheckerManager &Mgr) {
+ Mgr.registerChecker<DivZeroChecker, DivZeroChecker::DivideZeroChecker>();
}
-bool ento::shouldRegisterDivZeroChecker(const CheckerManager &mgr) {
- return true;
-}
+bool ento::shouldRegisterDivZeroChecker(const CheckerManager &) { return true; }
-void ento::registerTaintedDivChecker(CheckerManager &mgr) {
- DivZeroChecker *checker;
- if (!mgr.isRegisteredChecker<DivZeroChecker>())
- checker = mgr.registerChecker<DivZeroChecker>();
- else
- checker = mgr.getChecker<DivZeroChecker>();
- checker->ChecksEnabled[DivZeroChecker::CK_TaintedDivChecker] = true;
- checker->CheckNames[DivZeroChecker::CK_TaintedDivChecker] =
- mgr.getCurrentCheckerName();
+void ento::registerTaintedDivChecker(CheckerManager &Mgr) {
+ Mgr.registerChecker<DivZeroChecker, DivZeroChecker::TaintedDivChecker>();
}
-bool ento::shouldRegisterTaintedDivChecker(const CheckerManager &mgr) {
+bool ento::shouldRegisterTaintedDivChecker(const CheckerManager &) {
return true;
}
diff --git a/clang/lib/StaticAnalyzer/Core/Checker.cpp b/clang/lib/StaticAnalyzer/Core/Checker.cpp
index d93db6ddfc3bf..2bbb7a541457b 100644
--- a/clang/lib/StaticAnalyzer/Core/Checker.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Checker.cpp
@@ -18,9 +18,8 @@ using namespace ento;
int ImplicitNullDerefEvent::Tag;
-StringRef CheckerBase::getTagDescription() const { return getName(); }
-
-CheckerNameRef CheckerBase::getName() const { return Name; }
+void CheckerBase::printState(raw_ostream &Out, ProgramStateRef State,
+ const char *NL, const char *Sep) const {}
CheckerProgramPointTag::CheckerProgramPointTag(StringRef CheckerName,
StringRef Msg)
More information about the cfe-commits
mailing list