[clang] [-Wunsafe-buffer-usage] Add fixits for array to pointer assignment (PR #81343)

via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 14 14:49:15 PST 2024


https://github.com/jkorous-apple updated https://github.com/llvm/llvm-project/pull/81343

>From dfc9d95c185f5228f5c9680a19aa396d20e33d19 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Thu, 25 Jan 2024 13:52:12 -0800
Subject: [PATCH 01/10] [-Wunsafe-buffer-usage] Emit fixits for arguments of
 function pointers calls

Currently we ignore calls on function pointers (unlike direct calls of
functions and class methods). This patch adds support for function pointers as
well.

The change is to simply replace use of forEachArgumentWithParam matcher in UPC
gadget with forEachArgumentWithParamType.

from the documentation of forEachArgumentWithParamType:
/// Matches all arguments and their respective types for a \c CallExpr or
/// \c CXXConstructExpr. It is very similar to \c forEachArgumentWithParam but
/// it works on calls through function pointers as well.

Currently the matcher also uses hasPointerType() which checks that the
canonical type of an argument is pointer and won't match on arrays decayed to
pointer. Replacing hasPointerType() with isAnyPointerType() which allows
implicit casts allows for the arrays to be matched as well and this way we get
fixits for array arguments to function pointer calls too.
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp             |  4 ++--
 ...fer-usage-fixits-pointer-arg-to-func-ptr-call.cpp | 12 ++++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index d00c598c4b9de3..c5a87f14bc8880 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -282,8 +282,8 @@ isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
   //    (i.e., computing the distance between two pointers); or ...
 
   auto CallArgMatcher =
-      callExpr(forEachArgumentWithParam(InnerMatcher,
-                  hasPointerType() /* array also decays to pointer type*/),
+      callExpr(forEachArgumentWithParamType(InnerMatcher,
+                  isAnyPointer() /* array also decays to pointer type*/),
           unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
 
   auto CastOperandMatcher =
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
new file mode 100644
index 00000000000000..ae761e46a98191
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \
+// RUN:            -fsafe-buffer-usage-suggestions \
+// RUN:            -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+void unsafe_array_func_ptr_call(void (*fn_ptr)(int *param)) {
+  int p[32];
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 32> p"
+
+  int tmp = p[5];
+  fn_ptr(p);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()"
+}

>From 44dab965c459f2cd6084ea332f4a6756f57254e0 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Thu, 1 Feb 2024 14:44:01 -0800
Subject: [PATCH 02/10] [-Wunsafe-buffer-usage][NFC] Add tests for function
 pointer call fixits

---
 ...ge-fixits-pointer-arg-to-func-ptr-call.cpp | 38 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
index ae761e46a98191..0459d6549fd86f 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
@@ -6,7 +6,43 @@ void unsafe_array_func_ptr_call(void (*fn_ptr)(int *param)) {
   int p[32];
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 32> p"
 
-  int tmp = p[5];
+  p[5] = 10;
   fn_ptr(p);
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()"
 }
+
+void unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) {
+  int *p;
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+  p[5] = 10;
+  fn_ptr(p);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()"
+}
+
+void addr_of_unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) {
+  int *p;
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+  p[5] = 10;
+  fn_ptr(&p[0]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:15}:"p.data()"
+}
+
+void addr_of_unsafe_ptr_w_offset_func_ptr_call(void (*fn_ptr)(int *param)) {
+  int *p;
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+  p[5] = 10;
+  fn_ptr(&p[3]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:15}:"&p.data()[3]"
+}
+
+void preincrement_unsafe_ptr_func_ptr_call(void (*fn_ptr)(int *param)) {
+  int *p;
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span<int> p"
+
+  p[5] = 10;
+  fn_ptr(++p);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:13}:"(p = p.subspan(1)).data()"
+}

>From 76a91cf98f5cfbcdad188b57dd9d69e0870b93ed Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Tue, 13 Feb 2024 15:02:53 -0800
Subject: [PATCH 03/10] [-Wunsafe-buffer-usage][NFC] Format AST matcher in
 isInUnspecifiedPointerContext

...and turn off clang-format for the block of AST matchers.
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index c5a87f14bc8880..ca346444e047e5 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -281,10 +281,13 @@ isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
   // 4. the operand of a pointer subtraction operation
   //    (i.e., computing the distance between two pointers); or ...
 
-  auto CallArgMatcher =
-      callExpr(forEachArgumentWithParamType(InnerMatcher,
-                  isAnyPointer() /* array also decays to pointer type*/),
-          unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
+  // clang-format off
+  auto CallArgMatcher = callExpr(
+    forEachArgumentWithParamType(
+      InnerMatcher, 
+      isAnyPointer() /* array also decays to pointer type*/),
+    unless(callee(
+      functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
 
   auto CastOperandMatcher =
       castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral),
@@ -306,6 +309,7 @@ isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
 			   hasRHS(hasPointerType())),
 		     eachOf(hasLHS(InnerMatcher),
 			    hasRHS(InnerMatcher)));
+  // clang-format on
 
   return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
 		    PtrSubtractionMatcher));

>From 9e5964f7ba80ffd3be0f7ed138c090bc8f9fb09b Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Fri, 2 Feb 2024 14:46:59 -0800
Subject: [PATCH 04/10] [-Wunsafe-buffer-usage] Ignore safe array subscripts

Don't emit warnings for array subscripts on constant size arrays where the index is constant and within bounds.

Example:
int arr[10];
arr[5] = 0; //safe, no warning

This patch recognizes only array indices that are integer literals - it doesn't understand more complex expressions (arithmetic on constants, etc.).
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 39 +++++++++++++++----
 .../warn-unsafe-buffer-usage-array.cpp        | 18 ++++++++-
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index ca346444e047e5..d150a679597a92 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -406,6 +406,31 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
   }
   return false;
 }
+
+AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
+  const DeclRefExpr * BaseDRE = dyn_cast_or_null<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
+  if (!BaseDRE)
+    return false;
+  if (!BaseDRE->getDecl())
+    return false;
+  auto BaseVarDeclTy = BaseDRE->getDecl()->getType();
+  if (!BaseVarDeclTy->isConstantArrayType())
+    return false;
+  const auto * CATy = dyn_cast_or_null<ConstantArrayType>(BaseVarDeclTy);
+  if (!CATy)
+    return false;
+  const APInt ArrSize = CATy->getSize();
+
+  if (const auto * IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) {
+    const APInt ArrIdx = IdxLit->getValue();
+    // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a bug
+    if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < ArrSize.getLimitedValue())
+      return true;
+  }
+
+  return false;
+}
+
 } // namespace clang::ast_matchers
 
 namespace {
@@ -598,16 +623,16 @@ class ArraySubscriptGadget : public WarningGadget {
   }
 
   static Matcher matcher() {
-    // FIXME: What if the index is integer literal 0? Should this be
-    // a safe gadget in this case?
-      // clang-format off
+    // clang-format off
       return stmt(arraySubscriptExpr(
             hasBase(ignoringParenImpCasts(
               anyOf(hasPointerType(), hasArrayType()))),
-            unless(hasIndex(
-                anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
-             )))
-            .bind(ArraySubscrTag));
+            unless(anyOf(
+              isSafeArraySubscript(),
+              hasIndex(
+                  anyOf(integerLiteral(equals(0)), arrayInitIndexExpr())
+              )
+            ))).bind(ArraySubscrTag));
     // clang-format on
   }
 
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
index 90c11b1be95c25..4804223e8be058 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-everything -Wunsafe-buffer-usage \
 // RUN:            -fsafe-buffer-usage-suggestions \
 // RUN:            -verify %s
 
@@ -22,3 +22,19 @@ struct Foo {
 void foo2(Foo& f, unsigned idx) {
   f.member_buffer[idx] = 0; // expected-warning{{unsafe buffer access}}
 }
+
+void constant_idx_safe(unsigned idx) {
+  int buffer[10];
+  buffer[9] = 0;
+}
+
+void constant_idx_safe0(unsigned idx) {
+  int buffer[10];
+  buffer[0] = 0;
+}
+
+void constant_idx_unsafe(unsigned idx) {
+  int buffer[10];       // expected-warning{{'buffer' is an unsafe buffer that does not perform bounds checks}}
+                        // expected-note at -1{{change type of 'buffer' to 'std::array' to harden it}}
+  buffer[10] = 0;       // expected-note{{used in buffer access here}}
+}

>From a82cc25d9d265c0f88a8e9471d69ced193ffb6d5 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Fri, 2 Feb 2024 14:48:41 -0800
Subject: [PATCH 05/10] [-Wunsafe-buffer-usage][NFC] Update existing tests
 after constant safe index is ignored

---
 ...afe-buffer-usage-fixits-pointer-access.cpp |  8 +--
 ...ge-fixits-pointer-arg-to-func-ptr-call.cpp |  3 +-
 .../test/SemaCXX/warn-unsafe-buffer-usage.cpp | 53 ++++++++++---------
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp
index f94072015ff87d..b3c64f1b0d085e 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-access.cpp
@@ -83,11 +83,11 @@ void unsafe_method_invocation_single_param() {
 
 }
 
-void unsafe_method_invocation_single_param_array() {
+void unsafe_method_invocation_single_param_array(int idx) {
   int p[32];
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 32> p"
 
-  int tmp = p[5];
+  int tmp = p[idx];
   foo(p);
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:8-[[@LINE-1]]:8}:".data()"
 }
@@ -126,14 +126,14 @@ void unsafe_method_invocation_double_param() {
   m1(q, q, 8);
 }
 
-void unsafe_method_invocation_double_param_array() {
+void unsafe_method_invocation_double_param_array(int idx) {
   int p[14];
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 14> p"
 
   int q[40];
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 40> q"
 
-  q[5] = p[5];
+  q[idx] = p[idx];
 
   m1(p, p, 10);
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:7}:".data()"
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
index 0459d6549fd86f..216813ce45bfd5 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-arg-to-func-ptr-call.cpp
@@ -6,7 +6,8 @@ void unsafe_array_func_ptr_call(void (*fn_ptr)(int *param)) {
   int p[32];
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::array<int, 32> p"
 
-  p[5] = 10;
+  int idx;
+  p[idx] = 10;
   fn_ptr(p);
   // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:11}:".data()"
 }
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
index 67cdf252d6a8b6..9738b357834eed 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage.cpp
@@ -36,7 +36,7 @@ void testIncrement(char *p) { // expected-warning{{'p' is an unsafe pointer used
 void * voidPtrCall(void);
 char * charPtrCall(void);
 
-void testArraySubscripts(int *p, int **pp) {
+void testArraySubscripts(int idx, int *p, int **pp) {
 // expected-warning at -1{{'p' is an unsafe pointer used for buffer access}}
 // expected-warning at -2{{'pp' is an unsafe pointer used for buffer access}}
   foo(p[1],             // expected-note{{used in buffer access here}}
@@ -64,13 +64,14 @@ void testArraySubscripts(int *p, int **pp) {
                         // expected-note at -1{{change type of 'a' to 'std::array' to label it for hardening}}
     int b[10][10];      // expected-warning{{'b' is an unsafe buffer that does not perform bounds checks}}
 
-  foo(a[1], 1[a],   // expected-note2{{used in buffer access here}}
-      b[3][4],      // expected-warning{{unsafe buffer access}}
-                    // expected-note at -1{{used in buffer access here}}
-      4[b][3],      // expected-warning{{unsafe buffer access}}
-                    // expected-note at -1{{used in buffer access here}}
-      4[3[b]]);     // expected-warning{{unsafe buffer access}}
-                    // expected-note at -1{{used in buffer access here}}
+  foo(a[idx], idx[a],   // expected-note2{{used in buffer access here}}
+      b[idx][idx + 1],  // expected-warning{{unsafe buffer access}}
+                        // expected-note at -1{{used in buffer access here}}
+      (idx + 1)[b][idx],// expected-warning{{unsafe buffer access}}
+                        // expected-note at -1{{used in buffer access here}}
+      (idx + 1)[idx[b]]);
+                        // expected-warning at -1{{unsafe buffer access}}
+                        // expected-note at -2{{used in buffer access here}}
 
   // Not to warn when index is zero
   foo(p[0], pp[0][0], 0[0[pp]], 0[pp][0],
@@ -158,9 +159,9 @@ void testLambdaCaptureAndGlobal(int * p) {
   // expected-warning at -1{{'p' is an unsafe pointer used for buffer access}}
   int a[10];              // expected-warning{{'a' is an unsafe buffer that does not perform bounds checks}}
 
-  auto Lam = [p, a]() {
+  auto Lam = [p, a](int idx) {
     return p[1]           // expected-note{{used in buffer access here}}
-      + a[1] + garray[1]  // expected-note2{{used in buffer access here}}
+      + a[idx] + garray[idx]// expected-note2{{used in buffer access here}}
       + gp[1];            // expected-note{{used in buffer access here}}
 
   };
@@ -178,31 +179,31 @@ void testLambdaCapture() {
                           // expected-note at -1{{change type of 'b' to 'std::array' to label it for hardening}}
   int c[10];
 
-  auto Lam1 = [a]() {
-    return a[1];           // expected-note{{used in buffer access here}}
+  auto Lam1 = [a](unsigned idx) {
+    return a[idx];           // expected-note{{used in buffer access here}}
   };
 
-  auto Lam2 = [x = b[3]]() { // expected-note{{used in buffer access here}}
+  auto Lam2 = [x = b[c[5]]]() { // expected-note{{used in buffer access here}}
     return x;
   };
 
-  auto Lam = [x = c]() { // expected-warning{{'x' is an unsafe pointer used for buffer access}}
-    return x[3]; // expected-note{{used in buffer access here}}
+  auto Lam = [x = c](unsigned idx) { // expected-warning{{'x' is an unsafe pointer used for buffer access}}
+    return x[idx]; // expected-note{{used in buffer access here}}
   };
 }
 
-void testLambdaImplicitCapture() {
+void testLambdaImplicitCapture(long idx) {
   int a[10];              // expected-warning{{'a' is an unsafe buffer that does not perform bounds checks}}
                           // expected-note at -1{{change type of 'a' to 'std::array' to label it for hardening}}
   int b[10];              // expected-warning{{'b' is an unsafe buffer that does not perform bounds checks}}
                           // expected-note at -1{{change type of 'b' to 'std::array' to label it for hardening}}
   
   auto Lam1 = [=]() {
-    return a[1];           // expected-note{{used in buffer access here}}
+    return a[idx];           // expected-note{{used in buffer access here}}
   };
   
   auto Lam2 = [&]() {
-    return b[1];           // expected-note{{used in buffer access here}}
+    return b[idx];           // expected-note{{used in buffer access here}}
   };
 }
 
@@ -344,31 +345,31 @@ int testVariableDecls(int * p) {
   return p[1];        // expected-note{{used in buffer access here}}
 }
 
-template<typename T> void fArr(T t[]) {
+template<typename T> void fArr(T t[], long long idx) {
   // expected-warning at -1{{'t' is an unsafe pointer used for buffer access}}
   foo(t[1]);    // expected-note{{used in buffer access here}}
   T ar[8];      // expected-warning{{'ar' is an unsafe buffer that does not perform bounds checks}}
                 // expected-note at -1{{change type of 'ar' to 'std::array' to label it for hardening}}
-  foo(ar[5]);   // expected-note{{used in buffer access here}}
+  foo(ar[idx]);   // expected-note{{used in buffer access here}}
 }
 
-template void fArr<int>(int t[]); // FIXME: expected note {{in instantiation of}}
+template void fArr<int>(int t[], long long); // FIXME: expected note {{in instantiation of}}
 
 int testReturn(int t[]) {// expected-note{{change type of 't' to 'std::span' to preserve bounds information}}
   // expected-warning at -1{{'t' is an unsafe pointer used for buffer access}}
   return t[1]; // expected-note{{used in buffer access here}}
 }
 
-int testArrayAccesses(int n) {
+int testArrayAccesses(int n, int idx) {
     // auto deduced array type
     int cArr[2][3] = {{1, 2, 3}, {4, 5, 6}};
     // expected-warning at -1{{'cArr' is an unsafe buffer that does not perform bounds checks}}
     int d = cArr[0][0];
     foo(cArr[0][0]);
-    foo(cArr[1][2]);        // expected-note{{used in buffer access here}}
-                            // expected-warning at -1{{unsafe buffer access}}
-    auto cPtr = cArr[1][2]; // expected-note{{used in buffer access here}}
-                            // expected-warning at -1{{unsafe buffer access}}
+    foo(cArr[idx][idx + 1]);        // expected-note{{used in buffer access here}}
+                                    // expected-warning at -1{{unsafe buffer access}}
+    auto cPtr = cArr[idx][idx * 2]; // expected-note{{used in buffer access here}}
+                                    // expected-warning at -1{{unsafe buffer access}}
     foo(cPtr);
 
     // Typdefs

>From 1fdef3b4fd7750485fea602d3964c6b8b6e086bb Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Tue, 6 Feb 2024 11:36:08 -0800
Subject: [PATCH 06/10] [-Wunsafe-buffer-usage][NFC] Note future work for
 isSafeArraySubscript

-Warray-bounds implemented in Sema::CheckArrayAccess() already solves a similar
(opposite) problem and is battle-tested.

Adding -Wunsafe-buffer-usage diagnostics to Sema is a non starter as we need to emit
both the warnings and fixits and the performance impact of the fixit machine is
unacceptable for Sema.

CheckArrayAccess() as is doesn't distinguish between "safe" and "unknown" array
accesses. It also mixes the analysis that decides if an index is out of bounds
with crafting the diagnostics.

A refactor of CheckArrayAccess() might serve both the original purpose
and help us avoid false-positive with -Wunsafe-buffer-usage on constant
size arrrays.
---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index d150a679597a92..aa3240a86e562b 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -408,6 +408,12 @@ AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
 }
 
 AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
+  // FIXME: Proper solution:
+  //  - refactor Sema::CheckArrayAccess
+  //    - split safe/OOB/unknown decision logic from diagnostics emitting code
+  //    -  e. g. "Try harder to find a NamedDecl to point at in the note." already duplicated
+  //  - call both from Sema and from here
+
   const DeclRefExpr * BaseDRE = dyn_cast_or_null<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
   if (!BaseDRE)
     return false;

>From ed8933d4a71cfd25552f58a38fc0f0075e14b8ad Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Wed, 14 Feb 2024 14:26:49 -0800
Subject: [PATCH 07/10] [-Wunsafe-buffer-usage][NFC] clang-format the PR

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index aa3240a86e562b..62a82ff1e83c21 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -411,10 +411,12 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   // FIXME: Proper solution:
   //  - refactor Sema::CheckArrayAccess
   //    - split safe/OOB/unknown decision logic from diagnostics emitting code
-  //    -  e. g. "Try harder to find a NamedDecl to point at in the note." already duplicated
+  //    -  e. g. "Try harder to find a NamedDecl to point at in the note."
+  //    already duplicated
   //  - call both from Sema and from here
 
-  const DeclRefExpr * BaseDRE = dyn_cast_or_null<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
+  const DeclRefExpr *BaseDRE =
+      dyn_cast_or_null<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
   if (!BaseDRE)
     return false;
   if (!BaseDRE->getDecl())
@@ -422,15 +424,17 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
   auto BaseVarDeclTy = BaseDRE->getDecl()->getType();
   if (!BaseVarDeclTy->isConstantArrayType())
     return false;
-  const auto * CATy = dyn_cast_or_null<ConstantArrayType>(BaseVarDeclTy);
+  const auto *CATy = dyn_cast_or_null<ConstantArrayType>(BaseVarDeclTy);
   if (!CATy)
     return false;
   const APInt ArrSize = CATy->getSize();
 
-  if (const auto * IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) {
+  if (const auto *IdxLit = dyn_cast<IntegerLiteral>(Node.getIdx())) {
     const APInt ArrIdx = IdxLit->getValue();
-    // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a bug
-    if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < ArrSize.getLimitedValue())
+    // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
+    // bug
+    if (ArrIdx.isNonNegative() &&
+        ArrIdx.getLimitedValue() < ArrSize.getLimitedValue())
       return true;
   }
 

>From e4c9a966c1635dbe0dec6d8c5cde3fd59488c0bf Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Thu, 8 Feb 2024 14:30:20 -0800
Subject: [PATCH 08/10] [-Wunsafe-buffer-usage][NFC] Factor out .data() fixit
 to a function

---
 clang/lib/Analysis/UnsafeBufferUsage.cpp | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 62a82ff1e83c21..42ff645ac08b25 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -1492,6 +1492,9 @@ PointerAssignmentGadget::getFixits(const FixitStrategy &S) const {
   return std::nullopt;
 }
 
+/// \returns fixit that adds .data() call after \DRE.
+static inline std::optional<FixItList> createDataFixit(const ASTContext& Ctx, const DeclRefExpr * DRE);
+
 std::optional<FixItList>
 PointerInitGadget::getFixits(const FixitStrategy &S) const {
   const auto *LeftVD = PtrInitLHS;
@@ -1909,6 +1912,18 @@ PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {
   return std::nullopt;
 }
 
+static inline std::optional<FixItList> createDataFixit(const ASTContext& Ctx, const DeclRefExpr * DRE) {
+  const SourceManager &SM = Ctx.getSourceManager();
+  // Inserts the .data() after the DRE
+  std::optional<SourceLocation> EndOfOperand =
+      getPastLoc(DRE, SM, Ctx.getLangOpts());
+
+  if (EndOfOperand)
+    return FixItList{{FixItHint::CreateInsertion(*EndOfOperand, ".data()")}};
+
+  return std::nullopt;
+}
+
 // Generates fix-its replacing an expression of the form UPC(DRE) with
 // `DRE.data()`
 std::optional<FixItList>
@@ -1917,14 +1932,7 @@ UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const {
   switch (S.lookup(VD)) {
   case FixitStrategy::Kind::Array:
   case FixitStrategy::Kind::Span: {
-    ASTContext &Ctx = VD->getASTContext();
-    SourceManager &SM = Ctx.getSourceManager();
-    // Inserts the .data() after the DRE
-    std::optional<SourceLocation> EndOfOperand =
-        getPastLoc(Node, SM, Ctx.getLangOpts());
-
-    if (EndOfOperand)
-      return FixItList{{FixItHint::CreateInsertion(*EndOfOperand, ".data()")}};
+    return createDataFixit(VD->getASTContext(), Node);
     // FIXME: Points inside a macro expansion.
     break;
   }

>From 4fbbedebe6fc8d00ac2b18b0e3151ef54a44ac72 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Thu, 8 Feb 2024 14:33:03 -0800
Subject: [PATCH 09/10] [-Wunsafe-buffer-usage][NFC] Rename PointerAssignment
 gadget to PtrToPtrAssignment

---
 .../Analysis/Analyses/UnsafeBufferUsageGadgets.def    |  2 +-
 clang/lib/Analysis/UnsafeBufferUsage.cpp              | 11 ++++++-----
 clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 07f805ebb11013..2babc1df93d515 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -45,7 +45,7 @@ FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Poin
 FIXABLE_GADGET(UPCStandalonePointer)
 FIXABLE_GADGET(UPCPreIncrement)            // '++Ptr' in an Unspecified Pointer Context
 FIXABLE_GADGET(UUCAddAssign)            // 'Ptr += n' in an Unspecified Untyped Context
-FIXABLE_GADGET(PointerAssignment)
+FIXABLE_GADGET(PtrToPtrAssignment)
 FIXABLE_GADGET(PointerInit)
 
 #undef FIXABLE_GADGET
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 42ff645ac08b25..0a63880be2bc2a 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -801,7 +801,8 @@ class PointerInitGadget : public FixableGadget {
 ///  \code
 ///  p = q;
 ///  \endcode
-class PointerAssignmentGadget : public FixableGadget {
+/// where both `p` and `q` are pointers.
+class PtrToPtrAssignmentGadget : public FixableGadget {
 private:
   static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
   static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
@@ -809,13 +810,13 @@ class PointerAssignmentGadget : public FixableGadget {
   const DeclRefExpr * PtrRHS;         // the RHS pointer expression in `PA`
 
 public:
-  PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
-      : FixableGadget(Kind::PointerAssignment),
+  PtrToPtrAssignmentGadget(const MatchFinder::MatchResult &Result)
+      : FixableGadget(Kind::PtrToPtrAssignment),
     PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
     PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
 
   static bool classof(const Gadget *G) {
-    return G->getKind() == Kind::PointerAssignment;
+    return G->getKind() == Kind::PtrToPtrAssignment;
   }
 
   static Matcher matcher() {
@@ -1473,7 +1474,7 @@ bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
 }
 
 std::optional<FixItList>
-PointerAssignmentGadget::getFixits(const FixitStrategy &S) const {
+PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
   const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
   const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
   switch (S.lookup(LeftVD)) {
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp
index a5b578b98d4e5b..4cc1948f28a773 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-debug.cpp
@@ -53,7 +53,7 @@ void unclaimed_use() {
 void implied_unclaimed_var(int *b) {  // expected-warning{{'b' is an unsafe pointer used for buffer access}}
   int *a = new int[3];  // expected-warning{{'a' is an unsafe pointer used for buffer access}}
   a[4] = 7;  // expected-note{{used in buffer access here}}
-  a = b;  // debug-note{{safe buffers debug: gadget 'PointerAssignment' refused to produce a fix}}
+  a = b;  // debug-note{{safe buffers debug: gadget 'PtrToPtrAssignment' refused to produce a fix}}
   b++;  // expected-note{{used in pointer arithmetic here}} \
         // debug-note{{safe buffers debug: failed to produce fixit for 'b' : has an unclaimed use}}
 }

>From d7b1ae009e0d800e816875b1902a841477bd87ae Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous at apple.com>
Date: Thu, 8 Feb 2024 14:47:13 -0800
Subject: [PATCH 10/10] [-Wunsafe-buffer-usage] Implement fixits for const size
 array assigned to pointer

Introducing CArrayToPtrAssignment gadget and implementing fixits for some cases
of array being assigned to pointer.

Key observations:
- const size array can be assigned to std::span and bounds are propagated
- const size array can't be on LHS of assignment
This means array to pointer assignment has no strategy implications.

Fixits are implemented for cases where one of the variables in the assignment is
safe. For assignment of a safe array to unsafe pointer we know that the RHS will
never be transformed since it's safe and can immediately emit the optimal fixit.
Similarly for assignment of unsafe array to safe pointer.
(Obviously this is not and can't be future-proof in regards to what
variables we consider unsafe and that is fine.)

Fixits for assignment from unsafe array to unsafe pointer (from Array to Span
strategy) are not implemented in this patch as that needs to be properly designed
first - we might possibly implement optimal fixits for partially transformed
cases, put both variables in a single fixit group or do something else.
---
 .../Analyses/UnsafeBufferUsageGadgets.def     |  1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp      | 72 +++++++++++++++++++
 ...uffer-usage-fixits-array-assign-to-ptr.cpp | 43 +++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp

diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index 2babc1df93d515..3273c642eed517 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -46,6 +46,7 @@ FIXABLE_GADGET(UPCStandalonePointer)
 FIXABLE_GADGET(UPCPreIncrement)            // '++Ptr' in an Unspecified Pointer Context
 FIXABLE_GADGET(UUCAddAssign)            // 'Ptr += n' in an Unspecified Untyped Context
 FIXABLE_GADGET(PtrToPtrAssignment)
+FIXABLE_GADGET(CArrayToPtrAssignment)
 FIXABLE_GADGET(PointerInit)
 
 #undef FIXABLE_GADGET
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 0a63880be2bc2a..2bacad07a364ec 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -7,11 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
@@ -851,6 +854,59 @@ class PtrToPtrAssignmentGadget : public FixableGadget {
   }
 };
 
+/// An assignment expression of the form:
+///  \code
+///  ptr = array;
+///  \endcode
+/// where `p` is a pointer and `array` is a constant size array.
+class CArrayToPtrAssignmentGadget : public FixableGadget {
+private:
+  static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
+  static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
+  const DeclRefExpr * PtrLHS;         // the LHS pointer expression in `PA`
+  const DeclRefExpr * PtrRHS;         // the RHS pointer expression in `PA`
+
+public:
+  CArrayToPtrAssignmentGadget(const MatchFinder::MatchResult &Result)
+      : FixableGadget(Kind::CArrayToPtrAssignment),
+    PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)),
+    PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {}
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::CArrayToPtrAssignment;
+  }
+
+  static Matcher matcher() {
+    auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="),
+      hasRHS(ignoringParenImpCasts(declRefExpr(hasType(hasCanonicalType(constantArrayType())),
+                                               toSupportedVariable()).
+                                   bind(PointerAssignRHSTag))),
+                                   hasLHS(declRefExpr(hasPointerType(),
+                                                      toSupportedVariable()).
+                                          bind(PointerAssignLHSTag))));
+
+    return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr));
+  }
+
+  virtual std::optional<FixItList>
+  getFixits(const FixitStrategy &S) const override;
+
+  virtual const Stmt *getBaseStmt() const override {
+    // FIXME: This should be the binary operator, assuming that this method
+    // makes sense at all on a FixableGadget.
+    return PtrLHS;
+  }
+
+  virtual DeclUseList getClaimedVarUseSites() const override {
+    return DeclUseList{PtrLHS, PtrRHS};
+  }
+
+  virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
+  getStrategyImplications() const override {
+    return {};
+  }
+};
+
 /// A call of a function or method that performs unchecked buffer operations
 /// over one of its pointer parameters.
 class UnsafeBufferUsageAttrGadget : public WarningGadget {
@@ -1496,6 +1552,22 @@ PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
 /// \returns fixit that adds .data() call after \DRE.
 static inline std::optional<FixItList> createDataFixit(const ASTContext& Ctx, const DeclRefExpr * DRE);
 
+std::optional<FixItList>
+CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
+  const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl());
+  const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl());
+  if (S.lookup(LeftVD) == FixitStrategy::Kind::Span) {
+    if (S.lookup(RightVD) == FixitStrategy::Kind::Wontfix) {
+      return FixItList{};
+    }
+  } else if (S.lookup(LeftVD) == FixitStrategy::Kind::Wontfix) {
+    if (S.lookup(RightVD) == FixitStrategy::Kind::Array) {
+      return createDataFixit(RightVD->getASTContext(), PtrRHS);
+    }
+  }
+  return std::nullopt;
+}
+
 std::optional<FixItList>
 PointerInitGadget::getFixits(const FixitStrategy &S) const {
   const auto *LeftVD = PtrInitLHS;
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp
new file mode 100644
index 00000000000000..020a4520e2ccb4
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-array-assign-to-ptr.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage \
+// RUN:            -fsafe-buffer-usage-suggestions \
+// RUN:            -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+void safe_array_assigned_to_safe_ptr(unsigned idx) {
+  int buffer[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:
+  int* ptr;
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:
+  ptr = buffer;
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:
+}
+
+void safe_array_assigned_to_unsafe_ptr(unsigned idx) {
+  int buffer[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:
+  int* ptr;
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span<int> ptr"
+  ptr = buffer;
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:
+  ptr[idx] = 0;
+}
+
+void unsafe_array_assigned_to_safe_ptr(unsigned idx) {
+  int buffer[10];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:17}:"std::array<int, 10> buffer"
+  int* ptr;
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:
+  ptr = buffer;
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:15-[[@LINE-1]]:15}:".data()"
+  buffer[idx] = 0;
+}
+
+void unsafe_array_assigned_to_unsafe_ptr(unsigned idx) {
+  int buffer[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}}
+  int* ptr;
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}}
+  ptr = buffer;
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]:{{.*}}
+  buffer[idx] = 0;
+  ptr[idx] = 0;
+}



More information about the cfe-commits mailing list