[clang] 0d00a97 - [-Wunsafe-buffer-usage] NFC: Introduce an abstraction for fixable code patterns.
Artem Dergachev via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 16 15:02:41 PST 2022
Author: Artem Dergachev
Date: 2022-12-16T15:02:22-08:00
New Revision: 0d00a9722f3ce9d314ddd26a33e22921956b7519
URL: https://github.com/llvm/llvm-project/commit/0d00a9722f3ce9d314ddd26a33e22921956b7519
DIFF: https://github.com/llvm/llvm-project/commit/0d00a9722f3ce9d314ddd26a33e22921956b7519.diff
LOG: [-Wunsafe-buffer-usage] NFC: Introduce an abstraction for fixable code patterns.
This patch introduces a hierarchy of Gadget classes, which are going to be
the core concept in the somewhat sophisticated machine behind the upcoming
-Wunsafe-buffer-usage fix-it hints.
A gadget is a rigid code pattern that constitutes an operation that
the fixit machine "understands" well enough to work with. A gadget may be
"unsafe" (i.e., constitute a raw buffer access) or "safe" (constitute
a use of a pointer or array variable that isn't unsafe per se,
but may need fixing if the participating variable changes type
to a safe container/view).
We'll be able to make decisions about how to fix the code depending on
the set of gadgets that we've enumerated. Basically, in order to fix a type
of a variable, each use of that variable has to participate in a gadget,
and each such gadget has to consent to fixing itself according to
the variable's new type.
Differential Revision: https://reviews.llvm.org/D137348
Added:
clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
Modified:
clang/lib/Analysis/UnsafeBufferUsage.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
new file mode 100644
index 0000000000000..e2d3cbe0e3955
--- /dev/null
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -0,0 +1,33 @@
+//=- UnsafeBufferUsageGadgets.def - List of ways to use a buffer --*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// A gadget is an individual operation in the code that may be of interest to
+/// the UnsafeBufferUsage analysis.
+#ifndef GADGET
+#define GADGET(name)
+#endif
+
+/// Unsafe gadgets correspond to unsafe code patterns that warrant
+/// an immediate warning.
+#ifndef UNSAFE_GADGET
+#define UNSAFE_GADGET(name) GADGET(name)
+#endif
+
+/// Safe gadgets correspond to code patterns that aren't unsafe but need to be
+/// properly recognized in order to emit correct warnings and fixes over unsafe
+/// gadgets.
+#ifndef SAFE_GADGET
+#define SAFE_GADGET(name) GADGET(name)
+#endif
+
+UNSAFE_GADGET(Increment)
+UNSAFE_GADGET(Decrement)
+
+#undef SAFE_GADGET
+#undef UNSAFE_GADGET
+#undef GADGET
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 5e8ff29d51c38..42824358b65e4 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -14,11 +14,123 @@ using namespace llvm;
using namespace clang;
using namespace ast_matchers;
-namespace {
-// TODO: Better abstractions over gadgets.
-using GadgetList = std::vector<const Stmt *>;
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+ return anyOf(hasType(pointerType()),
+ hasType(autoType(hasDeducedType(
+ hasUnqualifiedDesugaredType(pointerType())))));
}
+namespace {
+/// Gadget is an individual operation in the code that may be of interest to
+/// this analysis. Each (non-abstract) subclass corresponds to a specific
+/// rigid AST structure that constitutes an operation on a pointer-type object.
+/// Discovery of a gadget in the code corresponds to claiming that we understand
+/// what this part of code is doing well enough to potentially improve it.
+/// Gadgets can be unsafe (immediately deserving a warning) or safe (not
+/// deserving a warning per se, but affecting our decision-making process
+/// nonetheless).
+class Gadget {
+public:
+ enum class Kind {
+#define GADGET(x) x,
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+ };
+
+ /// Common type of ASTMatchers used for discovering gadgets.
+ /// Useful for implementing the static matcher() methods
+ /// that are expected from all non-abstract subclasses.
+ using Matcher = decltype(stmt());
+
+ Gadget(Kind K) : K(K) {}
+
+ Kind getKind() const { return K; }
+
+ virtual bool isSafe() const = 0;
+ virtual const Stmt *getBaseStmt() const = 0;
+
+ virtual ~Gadget() = default;
+
+private:
+ Kind K;
+};
+
+using GadgetList = std::vector<std::unique_ptr<Gadget>>;
+
+/// Unsafe gadgets correspond to unsafe code patterns that warrants
+/// an immediate warning.
+class UnsafeGadget : public Gadget {
+public:
+ UnsafeGadget(Kind K) : Gadget(K) {}
+
+ static bool classof(const Gadget *G) { return G->isSafe(); }
+ bool isSafe() const final { return false; }
+};
+
+/// Safe gadgets correspond to code patterns that aren't unsafe but need to be
+/// properly recognized in order to emit correct warnings and fixes over unsafe
+/// gadgets. For example, if a raw pointer-type variable is replaced by
+/// a safe C++ container, every use of such variable may need to be
+/// carefully considered and possibly updated.
+class SafeGadget : public Gadget {
+public:
+ SafeGadget(Kind K) : Gadget(K) {}
+
+ static bool classof(const Gadget *G) { return !G->isSafe(); }
+ bool isSafe() const final { return true; }
+};
+
+/// An increment of a pointer-type value is unsafe as it may run the pointer
+/// out of bounds.
+class IncrementGadget : public UnsafeGadget {
+ static constexpr const char *const OpTag = "op";
+ const UnaryOperator *Op;
+
+public:
+ IncrementGadget(const MatchFinder::MatchResult &Result)
+ : UnsafeGadget(Kind::Increment),
+ Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
+
+ static bool classof(const Gadget *G) {
+ return G->getKind() == Kind::Increment;
+ }
+
+ static Matcher matcher() {
+ return stmt(unaryOperator(
+ hasOperatorName("++"),
+ hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
+ ).bind(OpTag));
+ }
+
+ const UnaryOperator *getBaseStmt() const override { return Op; }
+};
+
+/// A decrement of a pointer-type value is unsafe as it may run the pointer
+/// out of bounds.
+class DecrementGadget : public UnsafeGadget {
+ static constexpr const char *const OpTag = "op";
+ const UnaryOperator *Op;
+
+public:
+ DecrementGadget(const MatchFinder::MatchResult &Result)
+ : UnsafeGadget(Kind::Decrement),
+ Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {}
+
+ static bool classof(const Gadget *G) {
+ return G->getKind() == Kind::Decrement;
+ }
+
+ static Matcher matcher() {
+ return stmt(unaryOperator(
+ hasOperatorName("--"),
+ hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))
+ ).bind(OpTag));
+ }
+
+ const UnaryOperator *getBaseStmt() const override { return Op; }
+};
+} // namespace
+
// Scan the function and return a list of gadgets found with provided kits.
static GadgetList findGadgets(const Decl *D) {
@@ -29,40 +141,56 @@ static GadgetList findGadgets(const Decl *D) {
GadgetFinderCallback(GadgetList &Output) : Output(Output) {}
void run(const MatchFinder::MatchResult &Result) override {
- Output.push_back(Result.Nodes.getNodeAs<Stmt>("root_node"));
+ // In debug mode, assert that we've found exactly one gadget.
+ // This helps us avoid conflicts in .bind() tags.
+#if NDEBUG
+#define NEXT return
+#else
+ int numFound = 0;
+#define NEXT ++numFound
+#endif
+
+ // Figure out which matcher we've found, and call the appropriate
+ // subclass constructor.
+ // FIXME: Can we do this more logarithmically?
+#define GADGET(name) \
+ if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
+ Output.push_back(std::make_unique<name ## Gadget>(Result)); \
+ NEXT; \
+ }
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+
+ assert(numFound >= 1 && "Gadgets not found in match result!");
+ assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
+ (void)numFound;
}
};
GadgetList G;
MatchFinder M;
-
- auto IncrementMatcher = unaryOperator(
- hasOperatorName("++"),
- hasUnaryOperand(hasType(pointerType()))
- );
- auto DecrementMatcher = unaryOperator(
- hasOperatorName("--"),
- hasUnaryOperand(hasType(pointerType()))
- );
-
GadgetFinderCallback CB(G);
+ // clang-format off
M.addMatcher(
- stmt(forEachDescendant(
- stmt(
- anyOf(
- IncrementMatcher,
- DecrementMatcher
- /* Fill me in! */
- )
- // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
- // here, to make sure that the statement actually belongs to the
- // function and not to a nested function. However, forCallable uses
- // ParentMap which can't be used before the AST is fully constructed.
- // The original problem doesn't sound like it needs ParentMap though,
- // maybe there's a more direct solution?
- ).bind("root_node")
- )), &CB);
+ stmt(forEachDescendant(
+ stmt(anyOf(
+ // Add Gadget::matcher() for every gadget in the registry.
+#define GADGET(x) \
+ x ## Gadget::matcher().bind(#x),
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+ // FIXME: Is there a better way to avoid hanging comma?
+ unless(stmt())
+ ))
+ // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
+ // here, to make sure that the statement actually belongs to the
+ // function and not to a nested function. However, forCallable uses
+ // ParentMap which can't be used before the AST is fully constructed.
+ // The original problem doesn't sound like it needs ParentMap though,
+ // maybe there's a more direct solution?
+ )),
+ &CB
+ );
+ // clang-format on
M.match(*D->getBody(), D->getASTContext());
@@ -73,8 +201,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
UnsafeBufferUsageHandler &Handler) {
assert(D && D->getBody());
- GadgetList G = findGadgets(D);
- for (const Stmt *S : G) {
- Handler.handleUnsafeOperation(S);
+ GadgetList Gadgets = findGadgets(D);
+ for (const auto &G : Gadgets) {
+ Handler.handleUnsafeOperation(G->getBaseStmt());
}
}
More information about the cfe-commits
mailing list