[clang] d9ee935 - [Sema] Address-space sensitive check for unbounded arrays (v2)

Chris Hamilton via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 29 07:15:23 PDT 2020


Author: Chris Hamilton
Date: 2020-09-29T16:14:48+02:00
New Revision: d9ee935679e7164d1c47e351bbbcf5c25742b59c

URL: https://github.com/llvm/llvm-project/commit/d9ee935679e7164d1c47e351bbbcf5c25742b59c
DIFF: https://github.com/llvm/llvm-project/commit/d9ee935679e7164d1c47e351bbbcf5c25742b59c.diff

LOG: [Sema] Address-space sensitive check for unbounded arrays (v2)

Check applied to unbounded (incomplete) arrays and pointers to spot
cases where the computed address is beyond the largest possible
addressable extent of the array, based on the address space in which the
array is delcared, or which the pointer refers to.

Check helps to avoid cases of nonsense pointer math and array indexing
which could lead to linker failures or runtime exceptions.  Of
particular interest when building for embedded systems with small
address spaces.

This is version 2 of this patch -- version 1 had some testing issues
due to a sign error in existing code.  That error is corrected and
lit test for this chagne is extended to verify the fix.

Originally reviewed/accepted by: aaron.ballman
Original revision: https://reviews.llvm.org/D86796

Reviewed By: ebevhan

Differential Revision: https://reviews.llvm.org/D88174

Added: 
    clang/test/Sema/unbounded-array-bounds.c

Modified: 
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Sema/SemaChecking.cpp
    clang/test/Sema/const-eval.c
    clang/test/SemaCXX/constant-expression-cxx1y.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 42d50426ccd8..8f6c7b9400fa 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8917,6 +8917,14 @@ def warn_array_index_precedes_bounds : Warning<
 def warn_array_index_exceeds_bounds : Warning<
   "array index %0 is past the end of the array (which contains %1 "
   "element%s2)">, InGroup<ArrayBounds>;
+def warn_ptr_arith_exceeds_max_addressable_bounds : Warning<
+  "the pointer incremented by %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
+def warn_array_index_exceeds_max_addressable_bounds : Warning<
+  "array index %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
 def note_array_declared_here : Note<
   "array %0 declared here">;
 

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index eeb322262400..a5de6a5c88db 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -14057,11 +14057,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
   const ConstantArrayType *ArrayTy =
       Context.getAsConstantArrayType(BaseExpr->getType());
 
-  if (!ArrayTy)
-    return;
-
-  const Type *BaseType = ArrayTy->getElementType().getTypePtr();
-  if (EffectiveType->isDependentType() || BaseType->isDependentType())
+  const Type *BaseType =
+      ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr();
+  bool IsUnboundedArray = (BaseType == nullptr);
+  if (EffectiveType->isDependentType() ||
+      (!IsUnboundedArray && BaseType->isDependentType()))
     return;
 
   Expr::EvalResult Result;
@@ -14069,8 +14069,10 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
     return;
 
   llvm::APSInt index = Result.Val.getInt();
-  if (IndexNegated)
+  if (IndexNegated) {
+    index.setIsUnsigned(false);
     index = -index;
+  }
 
   const NamedDecl *ND = nullptr;
   if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
@@ -14078,6 +14080,69 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
   if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr))
     ND = ME->getMemberDecl();
 
+  if (IsUnboundedArray) {
+    if (index.isUnsigned() || !index.isNegative()) {
+      const auto &ASTC = getASTContext();
+      unsigned AddrBits =
+          ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace(
+              EffectiveType->getCanonicalTypeInternal()));
+      if (index.getBitWidth() < AddrBits)
+        index = index.zext(AddrBits);
+      CharUnits ElemCharUnits = ASTC.getTypeSizeInChars(EffectiveType);
+      llvm::APInt ElemBytes(index.getBitWidth(), ElemCharUnits.getQuantity());
+      // If index has more active bits than address space, we already know
+      // we have a bounds violation to warn about.  Otherwise, compute
+      // address of (index + 1)th element, and warn about bounds violation
+      // only if that address exceeds address space.
+      if (index.getActiveBits() <= AddrBits) {
+        bool Overflow;
+        llvm::APInt Product(index);
+        Product += 1;
+        Product = Product.umul_ov(ElemBytes, Overflow);
+        if (!Overflow && Product.getActiveBits() <= AddrBits)
+          return;
+      }
+
+      // Need to compute max possible elements in address space, since that
+      // is included in diag message.
+      llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits);
+      MaxElems = MaxElems.zext(std::max(AddrBits + 1, ElemBytes.getBitWidth()));
+      MaxElems += 1;
+      ElemBytes = ElemBytes.zextOrTrunc(MaxElems.getBitWidth());
+      MaxElems = MaxElems.udiv(ElemBytes);
+
+      unsigned DiagID =
+          ASE ? diag::warn_array_index_exceeds_max_addressable_bounds
+              : diag::warn_ptr_arith_exceeds_max_addressable_bounds;
+
+      // Diag message shows element size in bits and in "bytes" (platform-
+      // dependent CharUnits)
+      DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
+                          PDiag(DiagID)
+                              << index.toString(10, true) << AddrBits
+                              << (unsigned)ASTC.toBits(ElemCharUnits)
+                              << ElemBytes.toString(10, false)
+                              << MaxElems.toString(10, false)
+                              << (unsigned)MaxElems.getLimitedValue(~0U)
+                              << IndexExpr->getSourceRange());
+
+      if (!ND) {
+        // Try harder to find a NamedDecl to point at in the note.
+        while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BaseExpr))
+          BaseExpr = ASE->getBase()->IgnoreParenCasts();
+        if (const auto *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
+          ND = DRE->getDecl();
+        if (const auto *ME = dyn_cast<MemberExpr>(BaseExpr))
+          ND = ME->getMemberDecl();
+      }
+
+      if (ND)
+        DiagRuntimeBehavior(ND->getBeginLoc(), BaseExpr,
+                            PDiag(diag::note_array_declared_here) << ND);
+    }
+    return;
+  }
+
   if (index.isUnsigned() || !index.isNegative()) {
     // It is possible that the type of the base expression after
     // IgnoreParenCasts is incomplete, even though the type of the base
@@ -14140,9 +14205,8 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
       }
     }
 
-    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
-    if (ASE)
-      DiagID = diag::warn_array_index_exceeds_bounds;
+    unsigned DiagID = ASE ? diag::warn_array_index_exceeds_bounds
+                          : diag::warn_ptr_arith_exceeds_bounds;
 
     DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
                         PDiag(DiagID) << index.toString(10, true)
@@ -14163,12 +14227,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
 
   if (!ND) {
     // Try harder to find a NamedDecl to point at in the note.
-    while (const ArraySubscriptExpr *ASE =
-           dyn_cast<ArraySubscriptExpr>(BaseExpr))
+    while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BaseExpr))
       BaseExpr = ASE->getBase()->IgnoreParenCasts();
-    if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
       ND = DRE->getDecl();
-    if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr))
+    if (const auto *ME = dyn_cast<MemberExpr>(BaseExpr))
       ND = ME->getMemberDecl();
   }
 

diff  --git a/clang/test/Sema/const-eval.c b/clang/test/Sema/const-eval.c
index bbcbb0e25237..c94539ab1de2 100644
--- a/clang/test/Sema/const-eval.c
+++ b/clang/test/Sema/const-eval.c
@@ -140,10 +140,10 @@ EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a con
 
 // We evaluate these by providing 2s' complement semantics in constant
 // expressions, like we do for integers.
-void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;
-void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;
-__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c;
-void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];
+void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;                  // expected-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}}
+void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;                  // expected-warning {{refers past the last possible element}}
+__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}}
+void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];                // expected-warning {{refers past the last possible element}}
 
 struct PR35214_X {
   int k;

diff  --git a/clang/test/Sema/unbounded-array-bounds.c b/clang/test/Sema/unbounded-array-bounds.c
new file mode 100644
index 000000000000..d47463ff9434
--- /dev/null
+++ b/clang/test/Sema/unbounded-array-bounds.c
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu   -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple avr-pc-linux-gnu    -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+
+struct S {
+  long long a;
+  char b;
+  long long c;
+  short d;
+};
+
+struct S s[];
+
+void f1() {
+  ++s[3].a;
+  ++s[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: array index 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++s[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+long long ll[];
+
+void f2() {
+  ++ll[3];
+  ++ll[2705843009213693952];
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+  ++ll[847073650];
+  // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+}
+
+void f3(struct S p[]) {
+  ++p[3].a;
+  ++p[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++p[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+void f4(struct S *p) {
+  p += 3;
+  p += 7073650413200313099;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:3: warning: the pointer incremented by 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  p += 7073650;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+struct BQ {
+  struct S bigblock[3276];
+};
+
+struct BQ bq[];
+
+void f5() {
+  ++bq[0].bigblock[0].a;
+  ++bq[1].bigblock[0].a;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element)
+}
+
+void f6() {
+  int ints[] = {1, 3, 5, 7, 8, 6, 4, 5, 9};
+  int const n_ints = sizeof(ints) / sizeof(int);
+  unsigned long long const N = 3;
+
+  int *middle = &ints[0] + n_ints / 2;
+  // Should NOT produce a warning.
+  *(middle + 5 - N) = 22;
+}

diff  --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp
index 8bc4f88a63a9..7fe71d485350 100644
--- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -1018,8 +1018,9 @@ constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant
 }
 
 constexpr void PR28739(int n) { // expected-error {{never produces a constant}}
-  int *p = &n;
+  int *p = &n;                  // expected-note {{declared here}}
   p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}}
+  // expected-warning at -1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}}
 }
 
 constexpr void Void(int n) {


        


More information about the cfe-commits mailing list