[clang] ca6ceeb - Reland "[-Wunsafe-buffer-usage] Fix-Its transforming `&DRE[any]` to `&DRE.data()[any]`"

Ziqing Luo via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 5 14:54:12 PDT 2023


Author: Ziqing Luo
Date: 2023-04-05T14:54:03-07:00
New Revision: ca6ceeb0d6cd5b8545410d878c8d7bcce9538a3a

URL: https://github.com/llvm/llvm-project/commit/ca6ceeb0d6cd5b8545410d878c8d7bcce9538a3a
DIFF: https://github.com/llvm/llvm-project/commit/ca6ceeb0d6cd5b8545410d878c8d7bcce9538a3a.diff

LOG: Reland "[-Wunsafe-buffer-usage] Fix-Its transforming `&DRE[any]` to `&DRE.data()[any]`"

This commit relands 87b5807d3802b932c06d83c4287014872aa2caab, where a
test fails on a few specific targets.  Now hard-code a target
for the test.

Added: 
    clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp

Modified: 
    clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
    clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
    clang/lib/Analysis/UnsafeBufferUsage.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 8957ab664ed93..f78cf2c57689c 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -43,7 +43,7 @@ class UnsafeBufferUsageHandler {
 
   /// Returns the text indicating that the user needs to provide input there:
   virtual std::string
-  getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
+  getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") const {
     std::string s = std::string("<# ");
     s += HintTextToUser;
     s += " #>";

diff  --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index a8485682c1d1f..f0c99a767071f 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -30,9 +30,10 @@ WARNING_GADGET(Decrement)
 WARNING_GADGET(ArraySubscript)
 WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
-FIXABLE_GADGET(ULCArraySubscript)
+FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
+FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Pointer Context
 
 #undef FIXABLE_GADGET
 #undef WARNING_GADGET

diff  --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 4a8358af68ec5..1ef276ab90444 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include <memory>
 #include <optional>
+#include <sstream>
 
 using namespace llvm;
 using namespace clang;
@@ -112,6 +113,15 @@ class MatchDescendantVisitor
   bool Matches;
 };
 
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+    return hasType(hasCanonicalType(pointerType()));
+}
+
+static auto hasArrayType() {
+    return hasType(hasCanonicalType(arrayType()));
+}
+  
 AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher<Stmt>, innerMatcher) {
   const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
 
@@ -145,6 +155,30 @@ static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
     ));
 // clang-format off
 }
+
+
+// Returns a matcher that matches any expression `e` such that `InnerMatcher`
+// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
+static internal::Matcher<Stmt>
+isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
+  // A UPC can be
+  // 1. an argument of a function call (except the callee has [[unsafe_...]]
+  // attribute), or
+  // 2. the operand of a cast operation; or
+  // ...
+  auto CallArgMatcher =
+      callExpr(hasAnyArgument(allOf(
+                   hasPointerType() /* array also decays to pointer type*/,
+                   InnerMatcher)),
+               unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
+  auto CastOperandMatcher =
+      explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral),
+                       castSubExpr(allOf(hasPointerType(), InnerMatcher)));
+
+ return stmt(anyOf(CallArgMatcher, CastOperandMatcher));
+  // FIXME: any more cases? (UPC excludes the RHS of an assignment.  For now we
+  // don't have to check that.)
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -159,15 +193,6 @@ using FixItList = SmallVector<FixItHint, 4>;
 class Strategy;
 } // namespace
 
-// Because we're dealing with raw pointers, let's define what we mean by that.
-static auto hasPointerType() {
-    return hasType(hasCanonicalType(pointerType()));
-}
-
-static auto hasArrayType() {
-    return hasType(hasCanonicalType(arrayType()));
-}
-
 namespace {
 /// Gadget is an individual operation in the code that may be of interest to
 /// this analysis. Each (non-abstract) subclass corresponds to a specific
@@ -502,6 +527,46 @@ class PointerDereferenceGadget : public FixableGadget {
   virtual std::optional<FixItList> getFixits(const Strategy &S) const override;
 };
 
+// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
+// Context (see `isInUnspecifiedPointerContext`).
+// Note here `[]` is the built-in subscript operator.
+class UPCAddressofArraySubscriptGadget : public FixableGadget {
+private:
+  static constexpr const char *const UPCAddressofArraySubscriptTag =
+      "AddressofArraySubscriptUnderUPC";
+  const UnaryOperator *Node; // the `&DRE[any]` node
+
+public:
+  UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
+      : FixableGadget(Kind::ULCArraySubscript),
+        Node(Result.Nodes.getNodeAs<UnaryOperator>(
+            UPCAddressofArraySubscriptTag)) {
+    assert(Node != nullptr && "Expecting a non-null matching result");
+  }
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::UPCAddressofArraySubscript;
+  }
+
+  static Matcher matcher() {
+    return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
+        unaryOperator(hasOperatorName("&"),
+                      hasUnaryOperand(arraySubscriptExpr(
+                          hasBase(ignoringParenImpCasts(declRefExpr())))))
+            .bind(UPCAddressofArraySubscriptTag)))));
+  }
+
+  virtual std::optional<FixItList> getFixits(const Strategy &) const override;
+
+  virtual const Stmt *getBaseStmt() const override { return Node; }
+
+  virtual DeclUseList getClaimedVarUseSites() const override {
+    const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr());
+    const auto *DRE =
+        cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts());
+    return {DRE};
+  }
+};
 } // namespace
 
 namespace {
@@ -710,7 +775,7 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler) {
       // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers
       // match for the same node, so that we can group them
       // in one `anyOf` group (for better performance via short-circuiting).
-      stmt(anyOf(
+      stmt(eachOf(
 #define FIXABLE_GADGET(x)                                                              \
         x ## Gadget::matcher().bind(#x),
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
@@ -869,6 +934,26 @@ ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
   return std::nullopt;
 }
 
+static std::optional<FixItList> // forward declaration
+fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
+
+std::optional<FixItList>
+UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
+  auto DREs = getClaimedVarUseSites();
+  const auto *VD = cast<VarDecl>(DREs.front()->getDecl());
+
+  switch (S.lookup(VD)) {
+  case Strategy::Kind::Span:
+    return fixUPCAddressofArraySubscriptWithSpan(Node);
+  case Strategy::Kind::Wontfix:
+  case Strategy::Kind::Iterator:
+  case Strategy::Kind::Array:
+  case Strategy::Kind::Vector:
+    llvm_unreachable("unsupported strategies for FixableGadgets");
+  }
+  return std::nullopt; // something went wrong, no fix-it
+}
+
 // Return the text representation of the given `APInt Val`:
 static std::string getAPIntText(APInt Val) {
   SmallVector<char> Txt;
@@ -985,6 +1070,35 @@ PointerDereferenceGadget::getFixits(const Strategy &S) const {
   return std::nullopt;
 }
 
+// Generates fix-its replacing an expression of the form `&DRE[e]` with
+// `&DRE.data()[e]`:
+static std::optional<FixItList>
+fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
+  const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr());
+  const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts());
+  // FIXME: this `getASTContext` call is costly, we should pass the
+  // ASTContext in:
+  const ASTContext &Ctx = DRE->getDecl()->getASTContext();
+  const Expr *Idx = ArraySub->getIdx();
+  const SourceManager &SM = Ctx.getSourceManager();
+  const LangOptions &LangOpts = Ctx.getLangOpts();
+  std::stringstream SS;
+  bool IdxIsLitZero = false;
+
+  if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
+    if ((*ICE).isZero())
+      IdxIsLitZero = true;
+  if (IdxIsLitZero) {
+    // If the index is literal zero, we produce the most concise fix-it:
+    SS << getExprText(DRE, SM, LangOpts).str() << ".data()";
+  } else {
+    SS << "&" << getExprText(DRE, SM, LangOpts).str() << ".data()"
+       << "[" << getExprText(Idx, SM, LangOpts).str() << "]";
+  }
+  return FixItList{
+      FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
+}
+
 // For a non-null initializer `Init` of `T *` type, this function returns
 // `FixItHint`s producing a list initializer `{Init,  S}` as a part of a fix-it
 // to output stream.

diff  --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
new file mode 100644
index 0000000000000..8fdbc4bed4f6d
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple=arm-apple -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int f(unsigned long, void *);
+
+[[clang::unsafe_buffer_usage]]
+int unsafe_f(unsigned long, void *);
+
+void address_to_integer(int x) {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) &p[5];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[5]"
+  unsigned long m = (unsigned long) &p[x];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[x]"
+}
+
+void call_argument(int x) {
+  int * p = new int[10];
+
+  f((unsigned long) &p[5], &p[x]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"&p.data()[5]"
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"&p.data()[x]"
+}
+
+void ignore_unsafe_calls(int x) {
+  // Cannot fix `&p[x]` for now as it is an argument of an unsafe
+  // call. So no fix for variable `p`.
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_f((unsigned long) &p[5],
+	   // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+	   &p[x]);
+
+  int * q = new int[10];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span<int> q"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}"
+  unsafe_f((unsigned long) &q[5],
+	   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"&q.data()[5]"
+	   (void*)0);
+}
+
+void odd_subscript_form() {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) &5[p];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"&p.data()[5]"
+}
+
+void index_is_zero() {
+  int * p = new int[10];
+  int n = p[5];
+
+  f((unsigned long)&p[0],
+    // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:20-[[@LINE-1]]:25}:"p.data()"
+    &p[0]);
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:5-[[@LINE-1]]:10}:"p.data()"
+}
+
+// CHECK-NOT: fix-it
+// To test multiple function declarations, each of which carries
+// 
diff erent incomplete informations:
+[[clang::unsafe_buffer_usage]]
+void unsafe_g(void*);
+
+void unsafe_g(void*);
+
+void multiple_unsafe_fundecls() {
+  int * p = new int[10];
+
+  unsafe_g(&p[5]);
+}
+
+void unsafe_h(void*);
+
+[[clang::unsafe_buffer_usage]]
+void unsafe_h(void*);
+
+void unsafe_h(void* p) { ((char*)p)[10]; }
+
+void multiple_unsafe_fundecls2() {
+  int * p = new int[10];
+
+  unsafe_h(&p[5]);
+}


        


More information about the cfe-commits mailing list