[clang] 87b5807 - [-Wunsafe-buffer-usage] Fix-Its transforming `&DRE[any]` to `&DRE.data()[any]`
via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 4 13:27:02 PDT 2023
Author: ziqingluo-90
Date: 2023-04-04T13:26:45-07:00
New Revision: 87b5807d3802b932c06d83c4287014872aa2caab
URL: https://github.com/llvm/llvm-project/commit/87b5807d3802b932c06d83c4287014872aa2caab
DIFF: https://github.com/llvm/llvm-project/commit/87b5807d3802b932c06d83c4287014872aa2caab.diff
LOG: [-Wunsafe-buffer-usage] Fix-Its transforming `&DRE[any]` to `&DRE.data()[any]`
For an expression of the form `&DRE[any]` under an Unspecified
Pointer Context (UPC), we generate a fix-it for it with respect to a
strategy. In case the strategy is `std::span` (it is the only supported
one for now), the fix-it replaces the expression with
`&DRE.data()[any]`.
A UPC includes at least the contexts where
- the expression is being casted to an integer; and
- the expression is an argument of a call to a function that is not marked unsafe.
Reviewed by: NoQ, malavikasamak, t-rasmud, jkorous
Differential revision: https://reviews.llvm.org/D143128
Added:
clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
Modified:
clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
clang/lib/Analysis/UnsafeBufferUsage.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 8957ab664ed9..f78cf2c57689 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -43,7 +43,7 @@ class UnsafeBufferUsageHandler {
/// Returns the text indicating that the user needs to provide input there:
virtual std::string
- getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
+ getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") const {
std::string s = std::string("<# ");
s += HintTextToUser;
s += " #>";
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index a8485682c1d1..f0c99a767071 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -30,9 +30,10 @@ WARNING_GADGET(Decrement)
WARNING_GADGET(ArraySubscript)
WARNING_GADGET(PointerArithmetic)
WARNING_GADGET(UnsafeBufferUsageAttr)
-FIXABLE_GADGET(ULCArraySubscript)
+FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
FIXABLE_GADGET(DerefSimplePtrArithFixable)
FIXABLE_GADGET(PointerDereference)
+FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Pointer Context
#undef FIXABLE_GADGET
#undef WARNING_GADGET
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 4a8358af68ec..1ef276ab9044 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/SmallVector.h"
#include <memory>
#include <optional>
+#include <sstream>
using namespace llvm;
using namespace clang;
@@ -112,6 +113,15 @@ class MatchDescendantVisitor
bool Matches;
};
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+ return hasType(hasCanonicalType(pointerType()));
+}
+
+static auto hasArrayType() {
+ return hasType(hasCanonicalType(arrayType()));
+}
+
AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher<Stmt>, innerMatcher) {
const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
@@ -145,6 +155,30 @@ static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
));
// clang-format off
}
+
+
+// Returns a matcher that matches any expression `e` such that `InnerMatcher`
+// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
+static internal::Matcher<Stmt>
+isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
+ // A UPC can be
+ // 1. an argument of a function call (except the callee has [[unsafe_...]]
+ // attribute), or
+ // 2. the operand of a cast operation; or
+ // ...
+ auto CallArgMatcher =
+ callExpr(hasAnyArgument(allOf(
+ hasPointerType() /* array also decays to pointer type*/,
+ InnerMatcher)),
+ unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
+ auto CastOperandMatcher =
+ explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral),
+ castSubExpr(allOf(hasPointerType(), InnerMatcher)));
+
+ return stmt(anyOf(CallArgMatcher, CastOperandMatcher));
+ // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
+ // don't have to check that.)
+}
} // namespace clang::ast_matchers
namespace {
@@ -159,15 +193,6 @@ using FixItList = SmallVector<FixItHint, 4>;
class Strategy;
} // namespace
-// Because we're dealing with raw pointers, let's define what we mean by that.
-static auto hasPointerType() {
- return hasType(hasCanonicalType(pointerType()));
-}
-
-static auto hasArrayType() {
- return hasType(hasCanonicalType(arrayType()));
-}
-
namespace {
/// Gadget is an individual operation in the code that may be of interest to
/// this analysis. Each (non-abstract) subclass corresponds to a specific
@@ -502,6 +527,46 @@ class PointerDereferenceGadget : public FixableGadget {
virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
};
+// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
+// Context (see `isInUnspecifiedPointerContext`).
+// Note here `[]` is the built-in subscript operator.
+class UPCAddressofArraySubscriptGadget : public FixableGadget {
+private:
+ static constexpr const char *const UPCAddressofArraySubscriptTag =
+ "AddressofArraySubscriptUnderUPC";
+ const UnaryOperator *Node; // the `&DRE[any]` node
+
+public:
+ UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
+ : FixableGadget(Kind::ULCArraySubscript),
+ Node(Result.Nodes.getNodeAs<UnaryOperator>(
+ UPCAddressofArraySubscriptTag)) {
+ assert(Node != nullptr && "Expecting a non-null matching result");
+ }
+
+ static bool classof(const Gadget *G) {
+ return G->getKind() == Kind::UPCAddressofArraySubscript;
+ }
+
+ static Matcher matcher() {
+ return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
+ unaryOperator(hasOperatorName("&"),
+ hasUnaryOperand(arraySubscriptExpr(
+ hasBase(ignoringParenImpCasts(declRefExpr())))))
+ .bind(UPCAddressofArraySubscriptTag)))));
+ }
+
+ virtual std::optional<FixItList> getFixits(const Strategy &) const override;
+
+ virtual const Stmt *getBaseStmt() const override { return Node; }
+
+ virtual DeclUseList getClaimedVarUseSites() const override {
+ const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
+ const auto *DRE =
+ cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
+ return {DRE};
+ }
+};
} // namespace
namespace {
@@ -710,7 +775,7 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler) {
// We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers
// match for the same node, so that we can group them
// in one `anyOf` group (for better performance via short-circuiting).
- stmt(anyOf(
+ stmt(eachOf(
#define FIXABLE_GADGET(x) \
x ## Gadget::matcher().bind(#x),
#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
@@ -869,6 +934,26 @@ ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
return std::nullopt;
}
+static std::optional<FixItList> // forward declaration
+fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
+
+std::optional<FixItList>
+UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
+ auto DREs = getClaimedVarUseSites();
+ const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
+
+ switch (S.lookup(VD)) {
+ case Strategy::Kind::Span:
+ return fixUPCAddressofArraySubscriptWithSpan(Node);
+ case Strategy::Kind::Wontfix:
+ case Strategy::Kind::Iterator:
+ case Strategy::Kind::Array:
+ case Strategy::Kind::Vector:
+ llvm_unreachable("unsupported strategies for FixableGadgets");
+ }
+ return std::nullopt; // something went wrong, no fix-it
+}
+
// Return the text representation of the given `APInt Val`:
static std::string getAPIntText(APInt Val) {
SmallVector<char> Txt;
@@ -985,6 +1070,35 @@ PointerDereferenceGadget::getFixits(const Strategy &S) const {
return std::nullopt;
}
+// Generates fix-its replacing an expression of the form `&DRE[e]` with
+// `&DRE.data()[e]`:
+static std::optional<FixItList>
+fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
+ const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
+ const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
+ // FIXME: this `getASTContext` call is costly, we should pass the
+ // ASTContext in:
+ const ASTContext &Ctx = DRE->getDecl()->getASTContext();
+ const Expr *Idx = ArraySub->getIdx();
+ const SourceManager &SM = Ctx.getSourceManager();
+ const LangOptions &LangOpts = Ctx.getLangOpts();
+ std::stringstream SS;
+ bool IdxIsLitZero = false;
+
+ if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
+ if ((*ICE).isZero())
+ IdxIsLitZero = true;
+ if (IdxIsLitZero) {
+ // If the index is literal zero, we produce the most concise fix-it:
+ SS << getExprText(DRE, SM, LangOpts).str() << ".data()";
+ } else {
+ SS << "&" << getExprText(DRE, SM, LangOpts).str() << ".data()"
+ << "[" << getExprText(Idx, SM, LangOpts).str() << "]";
+ }
+ return FixItList{
+ FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
+}
+
// For a non-null initializer `Init` of `T *` type, this function returns
// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
// to output stream.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
new file mode 100644
index 000000000000..db72124741e3
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int f(unsigned long, void *);
+
+[[clang::unsafe_buffer_usage]]
+int unsafe_f(unsigned long, void *);
+
+void address_to_integer(int x) {
+ int * p = new int[10];
+ unsigned long n = (unsigned long) &p[5];
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[5]"
+ unsigned long m = (unsigned long) &p[x];
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[x]"
+}
+
+void call_argument(int x) {
+ int * p = new int[10];
+
+ f((unsigned long) &p[5], &p[x]);
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"&p.data()[5]"
+ // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"&p.data()[x]"
+}
+
+void ignore_unsafe_calls(int x) {
+ // Cannot fix `&p[x]` for now as it is an argument of an unsafe
+ // call. So no fix for variable `p`.
+ int * p = new int[10];
+ // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+ unsafe_f((unsigned long) &p[5],
+ // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+ &p[x]);
+
+ int * q = new int[10];
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span<int> q"
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{"
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}"
+ unsafe_f((unsigned long) &q[5],
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"&q.data()[5]"
+ (void*)0);
+}
+
+void odd_subscript_form() {
+ int * p = new int[10];
+ unsigned long n = (unsigned long) &5[p];
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[5]"
+}
+
+void index_is_zero() {
+ int * p = new int[10];
+ int n = p[5];
+
+ f((unsigned long)&p[0],
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:20-[[@LINE-1]]:25}:"p.data()"
+ &p[0]);
+ // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:5-[[@LINE-1]]:10}:"p.data()"
+}
+
+// CHECK-NOT: fix-it
+// To test multiple function declarations, each of which carries
+//
diff erent incomplete informations:
+[[clang::unsafe_buffer_usage]]
+void unsafe_g(void*);
+
+void unsafe_g(void*);
+
+void multiple_unsafe_fundecls() {
+ int * p = new int[10];
+
+ unsafe_g(&p[5]);
+}
+
+void unsafe_h(void*);
+
+[[clang::unsafe_buffer_usage]]
+void unsafe_h(void*);
+
+void unsafe_h(void* p) { ((char*)p)[10]; }
+
+void multiple_unsafe_fundecls2() {
+ int * p = new int[10];
+
+ unsafe_h(&p[5]);
+}
More information about the cfe-commits
mailing list