[clang] Warning for unsafe invocation of span::data (PR #75650)

Malavika Samak via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 18 15:33:50 PST 2023


https://github.com/malavikasamak updated https://github.com/llvm/llvm-project/pull/75650

>From 809bf6f4237f634feaeb7e5b0b88be3a2e4de455 Mon Sep 17 00:00:00 2001
From: MalavikaSamak <malavika2 at apple.com>
Date: Fri, 15 Dec 2023 11:40:55 -0800
Subject: [PATCH 1/2] While refactoring projects to eradicate unsafe buffer
 accesses using -Wunsafe-buffer-usage, there maybe accidental re-introduction
 of new OutOfBound accesses into the code bases. One such case is invoking
 span::data() method on a span variable to retrieve a pointer, which is then
 cast to a larger type and dereferenced. Such dereferences can introduce
 OutOfBound accesses.

To address this, a new WarningGadget is being introduced to warn against such invocations.
---
 .../Analysis/Analyses/UnsafeBufferUsage.h     |   2 +-
 .../Analyses/UnsafeBufferUsageGadgets.def     |   1 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 +-
 clang/lib/Analysis/UnsafeBufferUsage.cpp      |  37 ++++-
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  19 ++-
 ...e-buffer-usage-warning-data-invocation.cpp | 133 ++++++++++++++++++
 6 files changed, 186 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
index 8a2d56668e32f9..b28f2c6b99c50e 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -66,7 +66,7 @@ class UnsafeBufferUsageHandler {
 
   /// Invoked when an unsafe operation over raw pointers is found.
   virtual void handleUnsafeOperation(const Stmt *Operation,
-                                     bool IsRelatedToDecl) = 0;
+                                     bool IsRelatedToDecl, ASTContext &Ctx) = 0;
 
   /// Invoked when a fix is suggested against a variable. This function groups
   /// all variables that must be fixed together (i.e their types must be changed
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 757ee452ced748..c9766168836510 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -30,6 +30,7 @@ WARNING_GADGET(Decrement)
 WARNING_GADGET(ArraySubscript)
 WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
+WARNING_GADGET(DataInvocation)
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
 FIXABLE_GADGET(PointerDereference)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 94e97a891baedc..9038dd879f54ae 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12058,7 +12058,7 @@ def warn_unsafe_buffer_variable : Warning<
   InGroup<UnsafeBufferUsage>, DefaultIgnore;
 def warn_unsafe_buffer_operation : Warning<
   "%select{unsafe pointer operation|unsafe pointer arithmetic|"
-  "unsafe buffer access|function introduces unsafe buffer manipulation}0">,
+  "unsafe buffer access|function introduces unsafe buffer manipulation|unsafe invocation of span::data}0">,
   InGroup<UnsafeBufferUsage>, DefaultIgnore;
 def note_unsafe_buffer_operation : Note<
   "used%select{| in pointer arithmetic| in buffer access}0 here">;
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 70eec1cee57f8e..e4eca9939b10d5 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -721,6 +721,33 @@ class UnsafeBufferUsageAttrGadget : public WarningGadget {
   DeclUseList getClaimedVarUseSites() const override { return {}; }
 };
 
+// Warning gadget for unsafe invocation of span::data method.
+// Triggers when the pointer returned by the invocation is immediately
+// cast to a larger type.
+
+class DataInvocationGadget : public WarningGadget {
+  constexpr static const char *const OpTag = "data_invocation_expr";
+  const ExplicitCastExpr *Op;
+
+  public:
+  DataInvocationGadget(const MatchFinder::MatchResult &Result)
+      : WarningGadget(Kind::DataInvocation),
+        Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::DataInvocation;
+  }
+ 
+  static Matcher matcher() {
+    return stmt(
+        explicitCastExpr(has(cxxMemberCallExpr(callee(
+               cxxMethodDecl(hasName("data")))))).bind(OpTag));
+  }   
+  const Stmt *getBaseStmt() const override { return Op; }
+
+  DeclUseList getClaimedVarUseSites() const override { return {}; }
+};
+
 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
 // Context (see `isInUnspecifiedLvalueContext`).
 // Note here `[]` is the built-in subscript operator.
@@ -2657,8 +2684,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
     // every problematic operation and consider it done. No need to deal
     // with fixable gadgets, no need to group operations by variable.
     for (const auto &G : WarningGadgets) {
-      Handler.handleUnsafeOperation(G->getBaseStmt(),
-                                    /*IsRelatedToDecl=*/false);
+      Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false, 
+                                    D->getASTContext());
     }
 
     // This return guarantees that most of the machine doesn't run when
@@ -2893,7 +2920,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
                   Tracker, Handler, VarGrpMgr);
 
   for (const auto &G : UnsafeOps.noVar) {
-    Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false);
+    Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false
+                                  , D->getASTContext());
   }
 
   for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
@@ -2904,7 +2932,8 @@ void clang::checkUnsafeBufferUsage(const Decl *D,
                                       : FixItList{},
                                       D);
     for (const auto &G : WarningGadgets) {
-      Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true);
+      Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true
+                                    , D->getASTContext());
     }
   }
 }
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 0947e8b0f526a3..4f8e181806957e 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2226,8 +2226,8 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
   UnsafeBufferUsageReporter(Sema &S, bool SuggestSuggestions)
     : S(S), SuggestSuggestions(SuggestSuggestions) {}
 
-  void handleUnsafeOperation(const Stmt *Operation,
-                             bool IsRelatedToDecl) override {
+  void handleUnsafeOperation(const Stmt *Operation, bool IsRelatedToDecl, 
+                             ASTContext &Ctx) override {
     SourceLocation Loc;
     SourceRange Range;
     unsigned MsgParam = 0;
@@ -2261,6 +2261,21 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
         // note_unsafe_buffer_operation doesn't have this mode yet.
         assert(!IsRelatedToDecl && "Not implemented yet!");
         MsgParam = 3;
+      } else if (const auto *ECE = dyn_cast<ExplicitCastExpr>(Operation)) {
+        QualType destType = ECE->getType();
+        const uint64_t dSize = Ctx.getTypeSize(destType.getTypePtr()->getPointeeType());
+        if(const auto *CE =dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) {
+
+          if(CE->getRecordDecl()->getQualifiedNameAsString().compare("std::span"))
+           return;
+         
+          QualType srcType = CE->getType();
+          const uint64_t sSize = Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType());
+          if(sSize >= dSize)
+            return;
+        }
+        MsgParam = 4;
+ 
       }
       Loc = Operation->getBeginLoc();
       Range = Operation->getSourceRange();
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp
new file mode 100644
index 00000000000000..5cce91ae4753de
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -std=c++20  -Wno-all -Wunsafe-buffer-usage \
+// RUN:            -fsafe-buffer-usage-suggestions \
+// RUN:            -fblocks -include %s -verify %s
+
+// RUN: %clang -x c++ -frtti -fsyntax-only -fblocks -include %s %s 2>&1 | FileCheck --allow-empty %s
+// RUN: %clang_cc1 -std=c++11 -fblocks -include %s %s 2>&1 | FileCheck --allow-empty %s
+// RUN: %clang_cc1 -std=c++20 -fblocks -include %s %s 2>&1 | FileCheck --allow-empty %s
+// CHECK-NOT: [-Wunsafe-buffer-usage]
+
+#ifndef INCLUDED
+#define INCLUDED
+#pragma clang system_header
+
+// no spanification warnings for system headers
+void foo(...);  // let arguments of `foo` to hold testing expressions
+#else
+
+namespace std {
+  class type_info;
+  class bad_cast;
+  class bad_typeid;
+}
+using size_t = __typeof(sizeof(int));
+void *malloc(size_t);
+
+void foo(int v) {
+}
+
+void foo(int *p){}
+
+namespace std{
+  template <typename T> class span {
+
+  T *elements;
+ 
+  span(T *, unsigned){}
+
+  public:
+
+  constexpr span<T> subspan(size_t offset, size_t count) const {
+    return span<T> (elements+offset, count); // expected-warning{{unsafe pointer arithmetic}}
+  }
+
+  constexpr T* data() const noexcept {
+    return elements;
+  }
+
+ 
+  constexpr T* hello() const noexcept {
+   return elements;
+  }
+};
+ 
+ template <typename T> class span_duplicate {
+  span_duplicate(T *, unsigned){}
+
+  T array[10];
+
+  public:
+
+  T* data() {
+    return array;
+  }
+
+};
+}
+
+class span {
+
+ int array[10];
+ 
+ public:
+
+ int *data() {
+   return array;
+ }
+};
+
+class A {
+  int a, b, c;
+};
+
+struct Base {
+   virtual ~Base() = default;
+};
+
+struct Derived: Base {
+  int d;
+};
+
+void cast_without_data(int *ptr) {
+ A *a = (A*) ptr;
+ float *p = (float*) ptr;
+}
+
+void warned_patterns(std::span<int> span_ptr, std::span<Base> base_span) {
+    int *p;
+    A *a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}}
+    a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}}
+   
+    // TODO:: Should we warn when we cast from base to derived type?
+    Derived *b = dynamic_cast<Derived*> (base_span.data());// expected-warning{{unsafe invocation of span::data}}
+
+   // TODO:: This pattern is safe. We can add special handling for it, if we decide this
+   // is the recommended fixit for the unsafe invocations.
+   a = (A*)span_ptr.subspan(0, sizeof(A)).data(); // expected-warning{{unsafe invocation of span::data}}
+}
+
+void not_warned_patterns(std::span<A> span_ptr, std::span<Base> base_span) {
+    int *p = (int*)span_ptr.data();
+    p = (int*)span_ptr.data();
+    A *a = (A*) span_ptr.hello();
+}
+
+// We do not want to warn about other types
+void other_classes(std::span_duplicate<int> span_ptr, span sp) {
+    int *p;
+    A *a = (A*)span_ptr.data();
+    a = (A*)span_ptr.data(); 
+
+    a = (A*)sp.data();
+}
+
+// Potential source for false negatives
+
+void false_negatives(std::span<int> span_pt) {
+  int *ptr = span_pt.data();
+  A *a = (A*)ptr; //TODO: We want to warn here eventually.
+
+  int k = *ptr; // TODO: Can cause OOB if span_pt is empty
+
+}
+#endif

>From e80f03a07287297b1d7965301b2a1914823a4265 Mon Sep 17 00:00:00 2001
From: MalavikaSamak <malavika2 at apple.com>
Date: Mon, 18 Dec 2023 15:32:42 -0800
Subject: [PATCH 2/2] Addressing some comments and format issues.

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp      |  2 +-
 clang/lib/Sema/AnalysisBasedWarnings.cpp      |  5 --
 ...e-buffer-usage-warning-data-invocation.cpp | 46 +++++++++----------
 3 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index e4eca9939b10d5..5e3bd7dfb54d4b 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -741,7 +741,7 @@ class DataInvocationGadget : public WarningGadget {
   static Matcher matcher() {
     return stmt(
         explicitCastExpr(has(cxxMemberCallExpr(callee(
-               cxxMethodDecl(hasName("data")))))).bind(OpTag));
+               cxxMethodDecl(hasName("data"), ofClass(hasName("std::span"))))))).bind(OpTag));
   }   
   const Stmt *getBaseStmt() const override { return Op; }
 
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 4f8e181806957e..60406064bb7e9d 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -2265,17 +2265,12 @@ class UnsafeBufferUsageReporter : public UnsafeBufferUsageHandler {
         QualType destType = ECE->getType();
         const uint64_t dSize = Ctx.getTypeSize(destType.getTypePtr()->getPointeeType());
         if(const auto *CE =dyn_cast<CXXMemberCallExpr>(ECE->getSubExpr())) {
-
-          if(CE->getRecordDecl()->getQualifiedNameAsString().compare("std::span"))
-           return;
-         
           QualType srcType = CE->getType();
           const uint64_t sSize = Ctx.getTypeSize(srcType.getTypePtr()->getPointeeType());
           if(sSize >= dSize)
             return;
         }
         MsgParam = 4;
- 
       }
       Loc = Operation->getBeginLoc();
       Range = Operation->getSourceRange();
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp
index 5cce91ae4753de..79eb3bb4bacc6e 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-warning-data-invocation.cpp
@@ -12,7 +12,6 @@
 #pragma clang system_header
 
 // no spanification warnings for system headers
-void foo(...);  // let arguments of `foo` to hold testing expressions
 #else
 
 namespace std {
@@ -65,18 +64,13 @@ namespace std{
 };
 }
 
-class span {
+using namespace std;
 
- int array[10];
- 
- public:
-
- int *data() {
-   return array;
- }
+class A {
+  int a, b, c;
 };
 
-class A {
+class B {
   int a, b, c;
 };
 
@@ -93,41 +87,45 @@ void cast_without_data(int *ptr) {
  float *p = (float*) ptr;
 }
 
-void warned_patterns(std::span<int> span_ptr, std::span<Base> base_span) {
-    int *p;
-    A *a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}}
-    a = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}}
+void warned_patterns(std::span<int> span_ptr, std::span<Base> base_span, span<int> span_without_qual) {
+    A *a1 = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}}
+    a1 = (A*)span_ptr.data(); // expected-warning{{unsafe invocation of span::data}}
+  
+    A *a2 = (A*) span_without_qual.data(); // expected-warning{{unsafe invocation of span::data}}
    
     // TODO:: Should we warn when we cast from base to derived type?
     Derived *b = dynamic_cast<Derived*> (base_span.data());// expected-warning{{unsafe invocation of span::data}}
 
    // TODO:: This pattern is safe. We can add special handling for it, if we decide this
    // is the recommended fixit for the unsafe invocations.
-   a = (A*)span_ptr.subspan(0, sizeof(A)).data(); // expected-warning{{unsafe invocation of span::data}}
+   A *a3 = (A*)span_ptr.subspan(0, sizeof(A)).data(); // expected-warning{{unsafe invocation of span::data}}
 }
 
 void not_warned_patterns(std::span<A> span_ptr, std::span<Base> base_span) {
-    int *p = (int*)span_ptr.data();
-    p = (int*)span_ptr.data();
-    A *a = (A*) span_ptr.hello();
+    int *p = (int*) span_ptr.data(); // Cast to a smaller type
+  
+    B *b = (B*) span_ptr.data(); // Cast to a type of same size.
+
+    p = (int*) span_ptr.data();
+    A *a = (A*) span_ptr.hello(); // Invoking other methods.
 }
 
 // We do not want to warn about other types
-void other_classes(std::span_duplicate<int> span_ptr, span sp) {
+void other_classes(std::span_duplicate<int> span_ptr) {
     int *p;
     A *a = (A*)span_ptr.data();
     a = (A*)span_ptr.data(); 
-
-    a = (A*)sp.data();
 }
 
 // Potential source for false negatives
 
-void false_negatives(std::span<int> span_pt) {
+A false_negatives(std::span<int> span_pt, span<A> span_A) {
   int *ptr = span_pt.data();
-  A *a = (A*)ptr; //TODO: We want to warn here eventually.
 
-  int k = *ptr; // TODO: Can cause OOB if span_pt is empty
+  A *a1 = (A*)ptr; //TODO: We want to warn here eventually.
+
+  A *a2= span_A.data();
+  return *a2; // TODO: Can cause OOB if span_pt is empty
 
 }
 #endif



More information about the cfe-commits mailing list