[cfe-commits] r136997 - in /cfe/trunk: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/Sema/SemaChecking.cpp lib/Sema/SemaExpr.cpp lib/Sema/SemaExprCXX.cpp test/SemaCXX/array-bounds.cpp

Chandler Carruth chandlerc at google.com
Fri Aug 5 16:44:09 PDT 2011


Just a few comments inline..

On Fri, Aug 5, 2011 at 4:18 PM, Kaelyn Uhrain <rikka at google.com> wrote:

> Author: rikka
> Date: Fri Aug  5 18:18:04 2011
> New Revision: 136997
>
> URL: http://llvm.org/viewvc/llvm-project?rev=136997&view=rev
> Log:
> Perform array bounds checking in more situations and properly handle
> special
> case situations with the unary operators & and *. Also extend the array
> bounds
> checking to work with pointer arithmetic; the pointer arithemtic checking
> can
> be turned on using -Warray-bounds-pointer-arithmetic.
>
> The changes to where CheckArrayAccess gets called is based on some trial &
> error and a bunch of digging through source code and gdb backtraces in
> order
> to have the check performed under as many situations as possible (such as
> for
> variable initializers, arguments to function calls, and within conditional
> in
> addition to the simpler cases of the operands to binary and unary operator)
> while not being called--and triggering warnings--more than once for a given
> ArraySubscriptExpr.
>
> Modified:
>    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
>    cfe/trunk/include/clang/Sema/Sema.h
>    cfe/trunk/lib/Sema/SemaChecking.cpp
>    cfe/trunk/lib/Sema/SemaExpr.cpp
>    cfe/trunk/lib/Sema/SemaExprCXX.cpp
>    cfe/trunk/test/SemaCXX/array-bounds.cpp
>
> Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=136997&r1=136996&r2=136997&view=diff
>
> ==============================================================================
> --- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
> +++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Aug  5
> 18:18:04 2011
> @@ -4085,13 +4085,19 @@
>  def err_defaulted_move_unsupported : Error<
>   "defaulting move functions not yet supported">;
>
> +def warn_ptr_arith_precedes_bounds : Warning<
> +  "the pointer decremented by %0 refers before the beginning of the
> array">,
> +  InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore;
> +def warn_ptr_arith_exceeds_bounds : Warning<
> +  "the pointer incremented by %0 refers past the end of the array (that "
> +  "contains %1 element%s2)">,
> +  InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore;
>  def warn_array_index_precedes_bounds : Warning<
>   "array index of '%0' indexes before the beginning of the array">,
>   InGroup<DiagGroup<"array-bounds">>;
>  def warn_array_index_exceeds_bounds : Warning<
>   "array index of '%0' indexes past the end of an array (that contains %1 "
> -  "%select{element|elements}2)">,
> -  InGroup<DiagGroup<"array-bounds">>;
> +  "element%s2)">, InGroup<DiagGroup<"array-bounds">>;
>  def note_array_index_out_of_bounds : Note<
>   "array %0 declared here">;
>
>
> Modified: cfe/trunk/include/clang/Sema/Sema.h
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=136997&r1=136996&r2=136997&view=diff
>
> ==============================================================================
> --- cfe/trunk/include/clang/Sema/Sema.h (original)
> +++ cfe/trunk/include/clang/Sema/Sema.h Fri Aug  5 18:18:04 2011
> @@ -5948,6 +5948,8 @@
>                                                 unsigned ByteNo) const;
>
>  private:
> +  void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
> +                        bool isSubscript=false, bool
> AllowOnePastEnd=true);
>   void CheckArrayAccess(const Expr *E);
>   bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall);
>   bool CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall);
>
> Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=136997&r1=136996&r2=136997&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Aug  5 18:18:04 2011
> @@ -3370,6 +3370,11 @@
>   if (E->isTypeDependent() || E->isValueDependent())
>     return;
>
> +  // Check for array bounds violations in cases where the check isn't
> triggered
> +  // elsewhere for other Expr types (like BinaryOperators), e.g. when an
> +  // ArraySubscriptExpr is on the RHS of a variable initialization.
> +  CheckArrayAccess(E);
> +
>   // This is not the right CC for (e.g.) a variable initialization.
>   AnalyzeImplicitConversions(*this, E, CC);
>  }
> @@ -3474,6 +3479,15 @@
>     << TRange << Op->getSourceRange();
>  }
>
> +static const Type* getElementType(const Expr *BaseExpr) {
> +  const Type* EltType = BaseExpr->getType().getTypePtr();
> +  if (EltType->isAnyPointerType())
> +    return EltType->getPointeeType().getTypePtr();
> +  else if (EltType->isArrayType())
> +    return EltType->getBaseElementTypeUnsafe();
> +  return EltType;
> +}
>

I wonder if this would make sense to add to the AST itself...


> +
>  /// \brief Check whether this array fits the idiom of a size-one tail
> padded
>  /// array member of a struct.
>  ///
> @@ -3510,19 +3524,21 @@
>   return false;
>  }
>
> -static void CheckArrayAccess_Check(Sema &S,
> -                                   const clang::ArraySubscriptExpr *E) {
> -  const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts();
> +void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
> +                            bool isSubscript, bool AllowOnePastEnd) {
> +  const Type* EffectiveType = getElementType(BaseExpr);
> +  BaseExpr = BaseExpr->IgnoreParenCasts();
> +  IndexExpr = IndexExpr->IgnoreParenCasts();
> +
>   const ConstantArrayType *ArrayTy =
> -    S.Context.getAsConstantArrayType(BaseExpr->getType());
> +    Context.getAsConstantArrayType(BaseExpr->getType());
>   if (!ArrayTy)
>     return;
>
> -  const Expr *IndexExpr = E->getIdx();
>   if (IndexExpr->isValueDependent())
>     return;
>   llvm::APSInt index;
> -  if (!IndexExpr->isIntegerConstantExpr(index, S.Context))
> +  if (!IndexExpr->isIntegerConstantExpr(index, Context))
>     return;
>
>   const NamedDecl *ND = NULL;
> @@ -3535,47 +3551,94 @@
>     llvm::APInt size = ArrayTy->getSize();
>     if (!size.isStrictlyPositive())
>       return;
> +
> +    const Type* BaseType = getElementType(BaseExpr);
> +    if (!isSubscript && BaseType != EffectiveType) {
> +      // Make sure we're comparing apples to apples when comparing index
> to size
> +      uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);
> +      uint64_t array_typesize = Context.getTypeSize(BaseType);
>

It would be good to use more LLVM-style local variable names such as
'PtrArithTypeSize'


> +      if (ptrarith_typesize != array_typesize) {
> +        // There's a cast to a different size type involved
> +        uint64_t ratio = array_typesize / ptrarith_typesize;
> +        // TODO: Be smarter about handling cases where array_typesize is
> not a
> +        // multiple of ptrarith_typesize
> +        if (ptrarith_typesize * ratio == array_typesize)
> +          size *= llvm::APInt(size.getBitWidth(), ratio);
> +      }
> +    }
> +
>     if (size.getBitWidth() > index.getBitWidth())
>       index = index.sext(size.getBitWidth());
>     else if (size.getBitWidth() < index.getBitWidth())
>       size = size.sext(index.getBitWidth());
>
> -    // Don't warn for valid indexes
> -    if (index.slt(size))
> +    // For array subscripting the index must be less than size, but for
> pointer
> +    // arithmetic also allow the index (offset) to be equal to size since
> +    // computing the next address after the end of the array is legal and
> +    // commonly done e.g. in C++ iterators and range-based for loops.
>

This doesn't seem to match the logic below, which appears (but perhaps I'm
just misreading it) to allow one-past-the-end for code like "&foo[N]"... But
maybe I've just misread the code below.


> +    if (AllowOnePastEnd ? index.sle(size) : index.slt(size))
>       return;
>
>     // Also don't warn for arrays of size 1 which are members of some
>     // structure. These are often used to approximate flexible arrays in
> C89
>     // code.
> -    if (IsTailPaddedMemberArray(S, size, ND))
> +    if (IsTailPaddedMemberArray(*this, size, ND))
>       return;
>
> -    S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,
> -                          S.PDiag(diag::warn_array_index_exceeds_bounds)
> -                            << index.toString(10, true)
> -                            << size.toString(10, true)
> -                            << (unsigned)size.ugt(1)
> -                            << IndexExpr->getSourceRange());
> +    unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;
> +    if (isSubscript)
> +      DiagID = diag::warn_array_index_exceeds_bounds;
> +
> +    DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,
> +                        PDiag(DiagID) << index.toString(10, true)
> +                          << size.toString(10, true)
> +                          << (unsigned)size.getLimitedValue(~0U)
> +                          << IndexExpr->getSourceRange());
>   } else {
> -    S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,
> -                          S.PDiag(diag::warn_array_index_precedes_bounds)
> -                            << index.toString(10, true)
> -                            << IndexExpr->getSourceRange());
> +    unsigned DiagID = diag::warn_array_index_precedes_bounds;
> +    if (!isSubscript) {
> +      DiagID = diag::warn_ptr_arith_precedes_bounds;
> +      if (index.isNegative()) index = -index;
> +    }
> +
> +    DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,
> +                        PDiag(DiagID) << index.toString(10, true)
> +                          << IndexExpr->getSourceRange());
>   }
>
>   if (ND)
> -    S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,
> -                          S.PDiag(diag::note_array_index_out_of_bounds)
> -                            << ND->getDeclName());
> +    DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,
> +                        PDiag(diag::note_array_index_out_of_bounds)
> +                          << ND->getDeclName());
>  }
>
>  void Sema::CheckArrayAccess(const Expr *expr) {
> -  while (true) {
> -    expr = expr->IgnoreParens();
> +  int AllowOnePastEnd = 0;
> +  while (expr) {
> +    expr = expr->IgnoreParenImpCasts();
>     switch (expr->getStmtClass()) {
> -      case Stmt::ArraySubscriptExprClass:
> -        CheckArrayAccess_Check(*this, cast<ArraySubscriptExpr>(expr));
> +      case Stmt::ArraySubscriptExprClass: {
> +        const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(expr);
> +        CheckArrayAccess(ASE->getBase(), ASE->getIdx(), true,
> +                         AllowOnePastEnd > 0);
>         return;
> +      }
> +      case Stmt::UnaryOperatorClass: {
> +        // Only unwrap the * and & unary operators
> +        const UnaryOperator *UO = cast<UnaryOperator>(expr);
> +        expr = UO->getSubExpr();
> +        switch (UO->getOpcode()) {
> +          case UO_AddrOf:
> +            AllowOnePastEnd++;
> +            break;
> +          case UO_Deref:
> +            AllowOnePastEnd--;
> +            break;
> +          default:
> +            return;
> +        }
> +        break;
> +      }
>       case Stmt::ConditionalOperatorClass: {
>         const ConditionalOperator *cond = cast<ConditionalOperator>(expr);
>         if (const Expr *lhs = cond->getLHS())
>
> Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=136997&r1=136996&r2=136997&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaExpr.cpp Fri Aug  5 18:18:04 2011
> @@ -322,6 +322,7 @@
>   //   A glvalue of a non-function, non-array type T can be
>   //   converted to a prvalue.
>   if (!E->isGLValue()) return Owned(E);
> +
>   QualType T = E->getType();
>   assert(!T.isNull() && "r-value conversion on typeless expression?");
>
> @@ -364,8 +365,6 @@
>   //   type of the lvalue.
>   if (T.hasQualifiers())
>     T = T.getUnqualifiedType();
> -
> -  CheckArrayAccess(E);
>
>   return Owned(ImplicitCastExpr::Create(Context, T, CK_LValueToRValue,
>                                         E, 0, VK_RValue));
> @@ -3397,6 +3396,12 @@
>
>       Arg = ArgExpr.takeAs<Expr>();
>     }
> +
> +    // Check for array bounds violations for each argument to the call.
> This
> +    // check only triggers warnings when the argument isn't a more complex
> Expr
> +    // with its own checking, such as a BinaryOperator.
> +    CheckArrayAccess(Arg);
> +
>     AllArgs.push_back(Arg);
>   }
>
> @@ -5935,6 +5940,9 @@
>         return QualType();
>       }
>
> +      // Check array bounds for pointer arithemtic
> +      CheckArrayAccess(PExp, IExp);
> +
>       if (CompLHSTy) {
>         QualType LHSTy = Context.isPromotableBitField(lex.get());
>         if (LHSTy.isNull()) {
> @@ -5991,6 +5999,12 @@
>       if (!checkArithmeticOpPointerOperand(*this, Loc, lex.get()))
>         return QualType();
>
> +      Expr *IExpr = rex.get()->IgnoreParenCasts();
> +      UnaryOperator negRex(IExpr, UO_Minus, IExpr->getType(), VK_RValue,
>

Variables should be CamelCased. Also, maybe 'NegatedRHS' would be more
clear? A bit unfortunate we're already using lex/rex here.


> +                           OK_Ordinary, IExpr->getExprLoc());
> +      // Check array bounds for pointer arithemtic
> +      CheckArrayAccess(lex.get()->IgnoreParenCasts(), &negRex);
> +
>       if (CompLHSTy) *CompLHSTy = lex.get()->getType();
>       return lex.get()->getType();
>     }
> @@ -6984,9 +6998,7 @@
>     return QualType();
>
>   CheckForNullPointerDereference(*this, LHS);
> -  // Check for trivial buffer overflows.
> -  CheckArrayAccess(LHS->IgnoreParenCasts());
> -
> +
>   // C99 6.5.16p3: The type of an assignment expression is the type of the
>   // left operand unless the left operand has qualified type, in which case
>   // it is the unqualified version of the type of the left operand.
> @@ -7721,6 +7733,11 @@
>   }
>   if (ResultTy.isNull() || lhs.isInvalid() || rhs.isInvalid())
>     return ExprError();
> +
> +  // Check for array bounds violations for both sides of the
> BinaryOperator
> +  CheckArrayAccess(lhs.get());
> +  CheckArrayAccess(rhs.get());
> +
>   if (CompResultTy.isNull())
>     return Owned(new (Context) BinaryOperator(lhs.take(), rhs.take(), Opc,
>                                               ResultTy, VK, OK, OpLoc));
> @@ -8071,6 +8088,13 @@
>   if (resultType.isNull() || Input.isInvalid())
>     return ExprError();
>
> +  // Check for array bounds violations in the operand of the
> UnaryOperator,
> +  // except for the '*' and '&' operators that have to be handled
> specially
> +  // by CheckArrayAccess (as there are special cases like
> &array[arraysize]
> +  // that are explicitly defined as valid by the standard).
> +  if (Opc != UO_AddrOf && Opc != UO_Deref)
> +    CheckArrayAccess(Input.get());
> +
>   return Owned(new (Context) UnaryOperator(Input.take(), Opc, resultType,
>                                            VK, OK, OpLoc));
>  }
>
> Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=136997&r1=136996&r2=136997&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Aug  5 18:18:04 2011
> @@ -2264,9 +2264,6 @@
>       if (!From->isGLValue()) break;
>     }
>
> -    // Check for trivial buffer overflows.
> -    CheckArrayAccess(From);
> -
>     FromType = FromType.getUnqualifiedType();
>     From = ImplicitCastExpr::Create(Context, FromType, CK_LValueToRValue,
>                                     From, 0, VK_RValue);
>
> Modified: cfe/trunk/test/SemaCXX/array-bounds.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/array-bounds.cpp?rev=136997&r1=136996&r2=136997&view=diff
>
> ==============================================================================
> --- cfe/trunk/test/SemaCXX/array-bounds.cpp (original)
> +++ cfe/trunk/test/SemaCXX/array-bounds.cpp Fri Aug  5 18:18:04 2011
> @@ -37,11 +37,16 @@
>   s2.a[3] = 0; // no warning for 0-sized array
>
>   union {
> -    short a[2]; // expected-note {{declared here}}
> +    short a[2]; // expected-note 4 {{declared here}}
>     char c[4];
>   } u;
>   u.a[3] = 1; // expected-warning {{array index of '3' indexes past the end
> of an array (that contains 2 elements)}}
>   u.c[3] = 1; // no warning
> +  short *p = &u.a[2]; // no warning
> +  p = &u.a[3]; // expected-warning {{array index of '3' indexes past the
> end of an array (that contains 2 elements)}}
> +  *(&u.a[2]) = 1; // expected-warning {{array index of '2' indexes past
> the end of an array (that contains 2 elements)}}
> +  *(&u.a[3]) = 1; // expected-warning {{array index of '3' indexes past
> the end of an array (that contains 2 elements)}}
> +  *(&u.c[3]) = 1; // no warning
>
>   const int const_subscript = 3;
>   int array[2]; // expected-note {{declared here}}
> @@ -153,8 +158,7 @@
>  enum enumA { enumA_A, enumA_B, enumA_C, enumA_D, enumA_E };
>  enum enumB { enumB_X, enumB_Y, enumB_Z };
>  static enum enumB myVal = enumB_X;
> -void test_nested_switch()
> -{
> +void test_nested_switch() {
>   switch (enumA_E) { // expected-warning {{no case matching constant}}
>     switch (myVal) { // expected-warning {{enumeration values 'enumB_X' and
> 'enumB_Z' not handled in switch}}
>       case enumB_Y: ;
> @@ -199,3 +203,14 @@
>            B->c[3]; // expected-warning {{array index of '3' indexes past
> the end of an array (that contains 1 element)}}
>   }
>  }
> +
> +void bar(int x) {}
> +int test_more() {
> +  int foo[5]; // expected-note 5 {{array 'foo' declared here}}
> +  bar(foo[5]); // expected-warning {{array index of '5' indexes past the
> end of an array (that contains 5 elements)}}
> +  ++foo[5]; // expected-warning {{array index of '5' indexes past the end
> of an array (that contains 5 elements)}}
> +  if (foo[6]) // expected-warning {{array index of '6' indexes past the
> end of an array (that contains 5 elements)}}
> +    return --foo[6]; // expected-warning {{array index of '6' indexes past
> the end of an array (that contains 5 elements)}}
> +  else
> +    return foo[5]; // expected-warning {{array index of '5' indexes past
> the end of an array (that contains 5 elements)}}
> +}
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20110805/4c4e9045/attachment.html>


More information about the cfe-commits mailing list