[cfe-commits] r136997 - in /cfe/trunk: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/Sema/SemaChecking.cpp lib/Sema/SemaExpr.cpp lib/Sema/SemaExprCXX.cpp test/SemaCXX/array-bounds.cpp

Kaelyn Uhrain rikka at google.com
Fri Aug 5 16:18:04 PDT 2011


Author: rikka
Date: Fri Aug  5 18:18:04 2011
New Revision: 136997

URL: http://llvm.org/viewvc/llvm-project?rev=136997&view=rev
Log:
Perform array bounds checking in more situations and properly handle special
case situations with the unary operators & and *. Also extend the array bounds
checking to work with pointer arithmetic; the pointer arithemtic checking can
be turned on using -Warray-bounds-pointer-arithmetic.

The changes to where CheckArrayAccess gets called is based on some trial &
error and a bunch of digging through source code and gdb backtraces in order
to have the check performed under as many situations as possible (such as for
variable initializers, arguments to function calls, and within conditional in
addition to the simpler cases of the operands to binary and unary operator)
while not being called--and triggering warnings--more than once for a given
ArraySubscriptExpr.

Modified:
    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
    cfe/trunk/include/clang/Sema/Sema.h
    cfe/trunk/lib/Sema/SemaChecking.cpp
    cfe/trunk/lib/Sema/SemaExpr.cpp
    cfe/trunk/lib/Sema/SemaExprCXX.cpp
    cfe/trunk/test/SemaCXX/array-bounds.cpp

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=136997&r1=136996&r2=136997&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Aug  5 18:18:04 2011
@@ -4085,13 +4085,19 @@
 def err_defaulted_move_unsupported : Error<
   "defaulting move functions not yet supported">;
 
+def warn_ptr_arith_precedes_bounds : Warning<
+  "the pointer decremented by %0 refers before the beginning of the array">,
+  InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore;
+def warn_ptr_arith_exceeds_bounds : Warning<
+  "the pointer incremented by %0 refers past the end of the array (that "
+  "contains %1 element%s2)">,
+  InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore;
 def warn_array_index_precedes_bounds : Warning<
   "array index of '%0' indexes before the beginning of the array">,
   InGroup<DiagGroup<"array-bounds">>;
 def warn_array_index_exceeds_bounds : Warning<
   "array index of '%0' indexes past the end of an array (that contains %1 "
-  "%select{element|elements}2)">,
-  InGroup<DiagGroup<"array-bounds">>;
+  "element%s2)">, InGroup<DiagGroup<"array-bounds">>;
 def note_array_index_out_of_bounds : Note<
   "array %0 declared here">;
 

Modified: cfe/trunk/include/clang/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=136997&r1=136996&r2=136997&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/Sema.h (original)
+++ cfe/trunk/include/clang/Sema/Sema.h Fri Aug  5 18:18:04 2011
@@ -5948,6 +5948,8 @@
                                                 unsigned ByteNo) const;
 
 private:  
+  void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
+                        bool isSubscript=false, bool AllowOnePastEnd=true);
   void CheckArrayAccess(const Expr *E);
   bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall);
   bool CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall);

Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=136997&r1=136996&r2=136997&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Aug  5 18:18:04 2011
@@ -3370,6 +3370,11 @@
   if (E->isTypeDependent() || E->isValueDependent())
     return;
 
+  // Check for array bounds violations in cases where the check isn't triggered
+  // elsewhere for other Expr types (like BinaryOperators), e.g. when an
+  // ArraySubscriptExpr is on the RHS of a variable initialization.
+  CheckArrayAccess(E);
+
   // This is not the right CC for (e.g.) a variable initialization.
   AnalyzeImplicitConversions(*this, E, CC);
 }
@@ -3474,6 +3479,15 @@
     << TRange << Op->getSourceRange();
 }
 
+static const Type* getElementType(const Expr *BaseExpr) {
+  const Type* EltType = BaseExpr->getType().getTypePtr();
+  if (EltType->isAnyPointerType())
+    return EltType->getPointeeType().getTypePtr();
+  else if (EltType->isArrayType())
+    return EltType->getBaseElementTypeUnsafe();
+  return EltType;
+}
+
 /// \brief Check whether this array fits the idiom of a size-one tail padded
 /// array member of a struct.
 ///
@@ -3510,19 +3524,21 @@
   return false;
 }
 
-static void CheckArrayAccess_Check(Sema &S,
-                                   const clang::ArraySubscriptExpr *E) {
-  const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts();
+void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
+                            bool isSubscript, bool AllowOnePastEnd) {
+  const Type* EffectiveType = getElementType(BaseExpr);
+  BaseExpr = BaseExpr->IgnoreParenCasts();
+  IndexExpr = IndexExpr->IgnoreParenCasts();
+
   const ConstantArrayType *ArrayTy =
-    S.Context.getAsConstantArrayType(BaseExpr->getType());
+    Context.getAsConstantArrayType(BaseExpr->getType());
   if (!ArrayTy)
     return;
 
-  const Expr *IndexExpr = E->getIdx();
   if (IndexExpr->isValueDependent())
     return;
   llvm::APSInt index;
-  if (!IndexExpr->isIntegerConstantExpr(index, S.Context))
+  if (!IndexExpr->isIntegerConstantExpr(index, Context))
     return;
 
   const NamedDecl *ND = NULL;
@@ -3535,47 +3551,94 @@
     llvm::APInt size = ArrayTy->getSize();
     if (!size.isStrictlyPositive())
       return;
+
+    const Type* BaseType = getElementType(BaseExpr);
+    if (!isSubscript && BaseType != EffectiveType) {
+      // Make sure we're comparing apples to apples when comparing index to size
+      uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
+      uint64_t array_typesize = Context.getTypeSize(BaseType);
+      if (ptrarith_typesize != array_typesize) {
+        // There's a cast to a different size type involved
+        uint64_t ratio = array_typesize / ptrarith_typesize;
+        // TODO: Be smarter about handling cases where array_typesize is not a
+        // multiple of ptrarith_typesize
+        if (ptrarith_typesize * ratio == array_typesize)
+          size *= llvm::APInt(size.getBitWidth(), ratio);
+      }
+    }
+
     if (size.getBitWidth() > index.getBitWidth())
       index = index.sext(size.getBitWidth());
     else if (size.getBitWidth() < index.getBitWidth())
       size = size.sext(index.getBitWidth());
 
-    // Don't warn for valid indexes
-    if (index.slt(size))
+    // For array subscripting the index must be less than size, but for pointer
+    // arithmetic also allow the index (offset) to be equal to size since
+    // computing the next address after the end of the array is legal and
+    // commonly done e.g. in C++ iterators and range-based for loops.
+    if (AllowOnePastEnd ? index.sle(size) : index.slt(size))
       return;
 
     // Also don't warn for arrays of size 1 which are members of some
     // structure. These are often used to approximate flexible arrays in C89
     // code.
-    if (IsTailPaddedMemberArray(S, size, ND))
+    if (IsTailPaddedMemberArray(*this, size, ND))
       return;
 
-    S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,
-                          S.PDiag(diag::warn_array_index_exceeds_bounds)
-                            << index.toString(10, true)
-                            << size.toString(10, true)
-                            << (unsigned)size.ugt(1)
-                            << IndexExpr->getSourceRange());
+    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
+    if (isSubscript)
+      DiagID = diag::warn_array_index_exceeds_bounds;
+
+    DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,
+                        PDiag(DiagID) << index.toString(10, true)
+                          << size.toString(10, true)
+                          << (unsigned)size.getLimitedValue(~0U)
+                          << IndexExpr->getSourceRange());
   } else {
-    S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,
-                          S.PDiag(diag::warn_array_index_precedes_bounds)
-                            << index.toString(10, true)
-                            << IndexExpr->getSourceRange());
+    unsigned DiagID = diag::warn_array_index_precedes_bounds;
+    if (!isSubscript) {
+      DiagID = diag::warn_ptr_arith_precedes_bounds;
+      if (index.isNegative()) index = -index;
+    }
+
+    DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,
+                        PDiag(DiagID) << index.toString(10, true)
+                          << IndexExpr->getSourceRange());
   }
 
   if (ND)
-    S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,
-                          S.PDiag(diag::note_array_index_out_of_bounds)
-                            << ND->getDeclName());
+    DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,
+                        PDiag(diag::note_array_index_out_of_bounds)
+                          << ND->getDeclName());
 }
 
 void Sema::CheckArrayAccess(const Expr *expr) {
-  while (true) {
-    expr = expr->IgnoreParens();
+  int AllowOnePastEnd = 0;
+  while (expr) {
+    expr = expr->IgnoreParenImpCasts();
     switch (expr->getStmtClass()) {
-      case Stmt::ArraySubscriptExprClass:
-        CheckArrayAccess_Check(*this, cast<ArraySubscriptExpr>(expr));
+      case Stmt::ArraySubscriptExprClass: {
+        const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(expr);
+        CheckArrayAccess(ASE->getBase(), ASE->getIdx(), true,
+                         AllowOnePastEnd > 0);
         return;
+      }
+      case Stmt::UnaryOperatorClass: {
+        // Only unwrap the * and & unary operators
+        const UnaryOperator *UO = cast<UnaryOperator>(expr);
+        expr = UO->getSubExpr();
+        switch (UO->getOpcode()) {
+          case UO_AddrOf:
+            AllowOnePastEnd++;
+            break;
+          case UO_Deref:
+            AllowOnePastEnd--;
+            break;
+          default:
+            return;
+        }
+        break;
+      }
       case Stmt::ConditionalOperatorClass: {
         const ConditionalOperator *cond = cast<ConditionalOperator>(expr);
         if (const Expr *lhs = cond->getLHS())

Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=136997&r1=136996&r2=136997&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Fri Aug  5 18:18:04 2011
@@ -322,6 +322,7 @@
   //   A glvalue of a non-function, non-array type T can be
   //   converted to a prvalue.
   if (!E->isGLValue()) return Owned(E);
+
   QualType T = E->getType();
   assert(!T.isNull() && "r-value conversion on typeless expression?");
 
@@ -364,8 +365,6 @@
   //   type of the lvalue.    
   if (T.hasQualifiers())
     T = T.getUnqualifiedType();
-
-  CheckArrayAccess(E);
   
   return Owned(ImplicitCastExpr::Create(Context, T, CK_LValueToRValue,
                                         E, 0, VK_RValue));
@@ -3397,6 +3396,12 @@
 
       Arg = ArgExpr.takeAs<Expr>();
     }
+
+    // Check for array bounds violations for each argument to the call. This
+    // check only triggers warnings when the argument isn't a more complex Expr
+    // with its own checking, such as a BinaryOperator.
+    CheckArrayAccess(Arg);
+
     AllArgs.push_back(Arg);
   }
 
@@ -5935,6 +5940,9 @@
         return QualType();
       }
 
+      // Check array bounds for pointer arithemtic
+      CheckArrayAccess(PExp, IExp);
+
       if (CompLHSTy) {
         QualType LHSTy = Context.isPromotableBitField(lex.get());
         if (LHSTy.isNull()) {
@@ -5991,6 +5999,12 @@
       if (!checkArithmeticOpPointerOperand(*this, Loc, lex.get()))
         return QualType();
 
+      Expr *IExpr = rex.get()->IgnoreParenCasts();
+      UnaryOperator negRex(IExpr, UO_Minus, IExpr->getType(), VK_RValue,
+                           OK_Ordinary, IExpr->getExprLoc());
+      // Check array bounds for pointer arithemtic
+      CheckArrayAccess(lex.get()->IgnoreParenCasts(), &negRex);
+
       if (CompLHSTy) *CompLHSTy = lex.get()->getType();
       return lex.get()->getType();
     }
@@ -6984,9 +6998,7 @@
     return QualType();
 
   CheckForNullPointerDereference(*this, LHS);
-  // Check for trivial buffer overflows.
-  CheckArrayAccess(LHS->IgnoreParenCasts());
-  
+
   // C99 6.5.16p3: The type of an assignment expression is the type of the
   // left operand unless the left operand has qualified type, in which case
   // it is the unqualified version of the type of the left operand.
@@ -7721,6 +7733,11 @@
   }
   if (ResultTy.isNull() || lhs.isInvalid() || rhs.isInvalid())
     return ExprError();
+
+  // Check for array bounds violations for both sides of the BinaryOperator
+  CheckArrayAccess(lhs.get());
+  CheckArrayAccess(rhs.get());
+
   if (CompResultTy.isNull())
     return Owned(new (Context) BinaryOperator(lhs.take(), rhs.take(), Opc,
                                               ResultTy, VK, OK, OpLoc));
@@ -8071,6 +8088,13 @@
   if (resultType.isNull() || Input.isInvalid())
     return ExprError();
 
+  // Check for array bounds violations in the operand of the UnaryOperator,
+  // except for the '*' and '&' operators that have to be handled specially
+  // by CheckArrayAccess (as there are special cases like &array[arraysize]
+  // that are explicitly defined as valid by the standard).
+  if (Opc != UO_AddrOf && Opc != UO_Deref)
+    CheckArrayAccess(Input.get());
+
   return Owned(new (Context) UnaryOperator(Input.take(), Opc, resultType,
                                            VK, OK, OpLoc));
 }

Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=136997&r1=136996&r2=136997&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Aug  5 18:18:04 2011
@@ -2264,9 +2264,6 @@
       if (!From->isGLValue()) break;
     }
 
-    // Check for trivial buffer overflows.
-    CheckArrayAccess(From);
-
     FromType = FromType.getUnqualifiedType();
     From = ImplicitCastExpr::Create(Context, FromType, CK_LValueToRValue,
                                     From, 0, VK_RValue);

Modified: cfe/trunk/test/SemaCXX/array-bounds.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/array-bounds.cpp?rev=136997&r1=136996&r2=136997&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/array-bounds.cpp (original)
+++ cfe/trunk/test/SemaCXX/array-bounds.cpp Fri Aug  5 18:18:04 2011
@@ -37,11 +37,16 @@
   s2.a[3] = 0; // no warning for 0-sized array
 
   union {
-    short a[2]; // expected-note {{declared here}}
+    short a[2]; // expected-note 4 {{declared here}}
     char c[4];
   } u;
   u.a[3] = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}}
   u.c[3] = 1; // no warning
+  short *p = &u.a[2]; // no warning
+  p = &u.a[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}}
+  *(&u.a[2]) = 1; // expected-warning {{array index of '2' indexes past the end of an array (that contains 2 elements)}}
+  *(&u.a[3]) = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}}
+  *(&u.c[3]) = 1; // no warning
 
   const int const_subscript = 3;
   int array[2]; // expected-note {{declared here}}
@@ -153,8 +158,7 @@
 enum enumA { enumA_A, enumA_B, enumA_C, enumA_D, enumA_E };
 enum enumB { enumB_X, enumB_Y, enumB_Z };
 static enum enumB myVal = enumB_X;
-void test_nested_switch()
-{
+void test_nested_switch() {
   switch (enumA_E) { // expected-warning {{no case matching constant}}
     switch (myVal) { // expected-warning {{enumeration values 'enumB_X' and 'enumB_Z' not handled in switch}}
       case enumB_Y: ;
@@ -199,3 +203,14 @@
            B->c[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 element)}}
   }
 }
+
+void bar(int x) {}
+int test_more() {
+  int foo[5]; // expected-note 5 {{array 'foo' declared here}}
+  bar(foo[5]); // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}}
+  ++foo[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}}
+  if (foo[6]) // expected-warning {{array index of '6' indexes past the end of an array (that contains 5 elements)}}
+    return --foo[6]; // expected-warning {{array index of '6' indexes past the end of an array (that contains 5 elements)}}
+  else
+    return foo[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}}
+}





More information about the cfe-commits mailing list