[clang] da55e9b - [Sema] Address-space sensitive index check for unbounded arrays

Chris Hamilton via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 14 16:15:06 PDT 2020


Author: Chris Hamilton
Date: 2020-09-14T18:13:19-05:00
New Revision: da55e9ba1273284f1af61bceeaeb25e487838034

URL: https://github.com/llvm/llvm-project/commit/da55e9ba1273284f1af61bceeaeb25e487838034
DIFF: https://github.com/llvm/llvm-project/commit/da55e9ba1273284f1af61bceeaeb25e487838034.diff

LOG: [Sema] Address-space sensitive index check for unbounded arrays

Check applied to unbounded (incomplete) arrays and pointers
to spot cases where the computed address is beyond the
largest possible addressable extent of the array, based
on the address space in which the array is delcared, or
which the pointer refers to.

Check helps to avoid cases of nonsense pointer math and
array indexing which could lead to linker failures or
runtime exceptions.  Of particular interest when building
for embedded systems with small address spaces.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D86796

Added: 
    clang/test/Sema/unbounded-array-bounds.c

Modified: 
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Sema/SemaChecking.cpp
    clang/test/Sema/const-eval.c
    clang/test/SemaCXX/constant-expression-cxx1y.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e0d700c66724..e0be2072bb6e 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8847,6 +8847,14 @@ def warn_array_index_precedes_bounds : Warning<
 def warn_array_index_exceeds_bounds : Warning<
   "array index %0 is past the end of the array (which contains %1 "
   "element%s2)">, InGroup<ArrayBounds>;
+def warn_ptr_arith_exceeds_max_addressable_bounds : Warning<
+  "the pointer incremented by %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
+def warn_array_index_exceeds_max_addressable_bounds : Warning<
+  "array index %0 refers past the last possible element for an array in %1-bit "
+  "address space containing %2-bit (%3-byte) elements (max possible %4 element%s5)">,
+  InGroup<ArrayBounds>;
 def note_array_declared_here : Note<
   "array %0 declared here">;
 

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f2b70be1d431..dbfa329993c8 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -14038,11 +14038,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
   const ConstantArrayType *ArrayTy =
       Context.getAsConstantArrayType(BaseExpr->getType());
 
-  if (!ArrayTy)
-    return;
-
-  const Type *BaseType = ArrayTy->getElementType().getTypePtr();
-  if (EffectiveType->isDependentType() || BaseType->isDependentType())
+  const Type *BaseType =
+      ArrayTy == nullptr ? nullptr : ArrayTy->getElementType().getTypePtr();
+  bool IsUnboundedArray = (BaseType == nullptr);
+  if (EffectiveType->isDependentType() ||
+      (!IsUnboundedArray && BaseType->isDependentType()))
     return;
 
   Expr::EvalResult Result;
@@ -14059,6 +14059,69 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
   if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr))
     ND = ME->getMemberDecl();
 
+  if (IsUnboundedArray) {
+    if (index.isUnsigned() || !index.isNegative()) {
+      const auto &ASTC = getASTContext();
+      unsigned AddrBits =
+          ASTC.getTargetInfo().getPointerWidth(ASTC.getTargetAddressSpace(
+              EffectiveType->getCanonicalTypeInternal()));
+      if (index.getBitWidth() < AddrBits)
+        index = index.zext(AddrBits);
+      CharUnits ElemCharUnits = ASTC.getTypeSizeInChars(EffectiveType);
+      llvm::APInt ElemBytes(index.getBitWidth(), ElemCharUnits.getQuantity());
+      // If index has more active bits than address space, we already know
+      // we have a bounds violation to warn about.  Otherwise, compute
+      // address of (index + 1)th element, and warn about bounds violation
+      // only if that address exceeds address space.
+      if (index.getActiveBits() <= AddrBits) {
+        bool Overflow;
+        llvm::APInt Product(index);
+        Product += 1;
+        Product = Product.umul_ov(ElemBytes, Overflow);
+        if (!Overflow && Product.getActiveBits() <= AddrBits)
+          return;
+      }
+
+      // Need to compute max possible elements in address space, since that
+      // is included in diag message.
+      llvm::APInt MaxElems = llvm::APInt::getMaxValue(AddrBits);
+      MaxElems = MaxElems.zext(std::max(AddrBits + 1, ElemBytes.getBitWidth()));
+      MaxElems += 1;
+      ElemBytes = ElemBytes.zextOrTrunc(MaxElems.getBitWidth());
+      MaxElems = MaxElems.udiv(ElemBytes);
+
+      unsigned DiagID =
+          ASE ? diag::warn_array_index_exceeds_max_addressable_bounds
+              : diag::warn_ptr_arith_exceeds_max_addressable_bounds;
+
+      // Diag message shows element size in bits and in "bytes" (platform-
+      // dependent CharUnits)
+      DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
+                          PDiag(DiagID)
+                              << index.toString(10, true) << AddrBits
+                              << (unsigned)ASTC.toBits(ElemCharUnits)
+                              << ElemBytes.toString(10, false)
+                              << MaxElems.toString(10, false)
+                              << (unsigned)MaxElems.getLimitedValue(~0U)
+                              << IndexExpr->getSourceRange());
+
+      if (!ND) {
+        // Try harder to find a NamedDecl to point at in the note.
+        while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BaseExpr))
+          BaseExpr = ASE->getBase()->IgnoreParenCasts();
+        if (const auto *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
+          ND = DRE->getDecl();
+        if (const auto *ME = dyn_cast<MemberExpr>(BaseExpr))
+          ND = ME->getMemberDecl();
+      }
+
+      if (ND)
+        DiagRuntimeBehavior(ND->getBeginLoc(), BaseExpr,
+                            PDiag(diag::note_array_declared_here) << ND);
+    }
+    return;
+  }
+
   if (index.isUnsigned() || !index.isNegative()) {
     // It is possible that the type of the base expression after
     // IgnoreParenCasts is incomplete, even though the type of the base
@@ -14121,9 +14184,8 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
       }
     }
 
-    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
-    if (ASE)
-      DiagID = diag::warn_array_index_exceeds_bounds;
+    unsigned DiagID = ASE ? diag::warn_array_index_exceeds_bounds
+                          : diag::warn_ptr_arith_exceeds_bounds;
 
     DiagRuntimeBehavior(BaseExpr->getBeginLoc(), BaseExpr,
                         PDiag(DiagID) << index.toString(10, true)
@@ -14144,12 +14206,11 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
 
   if (!ND) {
     // Try harder to find a NamedDecl to point at in the note.
-    while (const ArraySubscriptExpr *ASE =
-           dyn_cast<ArraySubscriptExpr>(BaseExpr))
+    while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BaseExpr))
       BaseExpr = ASE->getBase()->IgnoreParenCasts();
-    if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(BaseExpr))
       ND = DRE->getDecl();
-    if (const MemberExpr *ME = dyn_cast<MemberExpr>(BaseExpr))
+    if (const auto *ME = dyn_cast<MemberExpr>(BaseExpr))
       ND = ME->getMemberDecl();
   }
 

diff  --git a/clang/test/Sema/const-eval.c b/clang/test/Sema/const-eval.c
index bbcbb0e25237..c94539ab1de2 100644
--- a/clang/test/Sema/const-eval.c
+++ b/clang/test/Sema/const-eval.c
@@ -140,10 +140,10 @@ EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a con
 
 // We evaluate these by providing 2s' complement semantics in constant
 // expressions, like we do for integers.
-void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;
-void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;
-__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c;
-void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];
+void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a;                  // expected-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}}
+void *PR28739b = &PR28739b + (__int128)(unsigned long)-1;                  // expected-warning {{refers past the last possible element}}
+__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // expected-warning {{refers past the last possible element}}
+void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1];                // expected-warning {{refers past the last possible element}}
 
 struct PR35214_X {
   int k;

diff  --git a/clang/test/Sema/unbounded-array-bounds.c b/clang/test/Sema/unbounded-array-bounds.c
new file mode 100644
index 000000000000..18a8225b8469
--- /dev/null
+++ b/clang/test/Sema/unbounded-array-bounds.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86-ADDR64 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu   -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-I386-ADDR32 %s \
+// RUN:              --implicit-check-not 'past the last possible element'
+// RUN: %clang_cc1 -triple avr-pc-linux-gnu    -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-AVR-ADDR16 %s  \
+// RUN:              --implicit-check-not 'past the last possible element'
+
+struct S {
+  long long a;
+  char b;
+  long long c;
+  short d;
+};
+
+struct S s[];
+
+void f1() {
+  ++s[3].a;
+  ++s[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: array index 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++s[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+long long ll[];
+
+void f2() {
+  ++ll[3];
+  ++ll[2705843009213693952];
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 2305843009213693952 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+  ++ll[847073650];
+  // CHECK-I386-ADDR32: :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 536870912 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 8192 elements)
+}
+
+void f3(struct S p[]) {
+  ++p[3].a;
+  ++p[7073650413200313099].b;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 64-bit {{.*}} (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:5: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  ++p[7073650].c;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+void f4(struct S *p) {
+  p += 3;
+  p += 7073650413200313099;
+  // CHECK-X86-ADDR64:  :[[@LINE-1]]:3: warning: the pointer incremented by 7073650413200313099 refers past the last possible element for an array in 64-bit address space containing 256-bit (32-byte) elements (max possible 576460752303423488 elements)
+  // CHECK-I386-ADDR32: :[[@LINE-2]]:3: warning: {{.*}} past the last possible element {{.*}} in 32-bit {{.*}} (max possible 178956970 elements)
+  // CHECK-AVR-ADDR16:  :[[@LINE-3]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+  p += 7073650;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:3: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 3276 elements)
+}
+
+struct BQ {
+  struct S bigblock[3276];
+};
+
+struct BQ bq[];
+
+void f5() {
+  ++bq[0].bigblock[0].a;
+  ++bq[1].bigblock[0].a;
+  // CHECK-AVR-ADDR16:  :[[@LINE-1]]:5: warning: {{.*}} past the last possible element {{.*}} in 16-bit {{.*}} (max possible 1 element)
+}

diff  --git a/clang/test/SemaCXX/constant-expression-cxx1y.cpp b/clang/test/SemaCXX/constant-expression-cxx1y.cpp
index 8bc4f88a63a9..7fe71d485350 100644
--- a/clang/test/SemaCXX/constant-expression-cxx1y.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -1018,8 +1018,9 @@ constexpr int S = sum(Cs); // expected-error{{must be initialized by a constant
 }
 
 constexpr void PR28739(int n) { // expected-error {{never produces a constant}}
-  int *p = &n;
+  int *p = &n;                  // expected-note {{declared here}}
   p += (__int128)(unsigned long)-1; // expected-note {{cannot refer to element 18446744073709551615 of non-array object in a constant expression}}
+  // expected-warning at -1 {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 32-bit (4-byte) elements (max possible 4611686018427387904 elements)}}
 }
 
 constexpr void Void(int n) {


        


More information about the cfe-commits mailing list