Just a few comments inline..<br><br><div class="gmail_quote">On Fri, Aug 5, 2011 at 4:18 PM, Kaelyn Uhrain <span dir="ltr"><<a href="mailto:rikka@google.com">rikka@google.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">
Author: rikka<br>
Date: Fri Aug 5 18:18:04 2011<br>
New Revision: 136997<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=136997&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=136997&view=rev</a><br>
Log:<br>
Perform array bounds checking in more situations and properly handle special<br>
case situations with the unary operators & and *. Also extend the array bounds<br>
checking to work with pointer arithmetic; the pointer arithemtic checking can<br>
be turned on using -Warray-bounds-pointer-arithmetic.<br>
<br>
The changes to where CheckArrayAccess gets called is based on some trial &<br>
error and a bunch of digging through source code and gdb backtraces in order<br>
to have the check performed under as many situations as possible (such as for<br>
variable initializers, arguments to function calls, and within conditional in<br>
addition to the simpler cases of the operands to binary and unary operator)<br>
while not being called--and triggering warnings--more than once for a given<br>
ArraySubscriptExpr.<br>
<br>
Modified:<br>
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td<br>
cfe/trunk/include/clang/Sema/Sema.h<br>
cfe/trunk/lib/Sema/SemaChecking.cpp<br>
cfe/trunk/lib/Sema/SemaExpr.cpp<br>
cfe/trunk/lib/Sema/SemaExprCXX.cpp<br>
cfe/trunk/test/SemaCXX/array-bounds.cpp<br>
<br>
Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=136997&r1=136996&r2=136997&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=136997&r1=136996&r2=136997&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)<br>
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Aug 5 18:18:04 2011<br>
@@ -4085,13 +4085,19 @@<br>
def err_defaulted_move_unsupported : Error<<br>
"defaulting move functions not yet supported">;<br>
<br>
+def warn_ptr_arith_precedes_bounds : Warning<<br>
+ "the pointer decremented by %0 refers before the beginning of the array">,<br>
+ InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore;<br>
+def warn_ptr_arith_exceeds_bounds : Warning<<br>
+ "the pointer incremented by %0 refers past the end of the array (that "<br>
+ "contains %1 element%s2)">,<br>
+ InGroup<DiagGroup<"array-bounds-pointer-arithmetic">>, DefaultIgnore;<br>
def warn_array_index_precedes_bounds : Warning<<br>
"array index of '%0' indexes before the beginning of the array">,<br>
InGroup<DiagGroup<"array-bounds">>;<br>
def warn_array_index_exceeds_bounds : Warning<<br>
"array index of '%0' indexes past the end of an array (that contains %1 "<br>
- "%select{element|elements}2)">,<br>
- InGroup<DiagGroup<"array-bounds">>;<br>
+ "element%s2)">, InGroup<DiagGroup<"array-bounds">>;<br>
def note_array_index_out_of_bounds : Note<<br>
"array %0 declared here">;<br>
<br>
<br>
Modified: cfe/trunk/include/clang/Sema/Sema.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=136997&r1=136996&r2=136997&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=136997&r1=136996&r2=136997&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/include/clang/Sema/Sema.h (original)<br>
+++ cfe/trunk/include/clang/Sema/Sema.h Fri Aug 5 18:18:04 2011<br>
@@ -5948,6 +5948,8 @@<br>
unsigned ByteNo) const;<br>
<br>
private:<br>
+ void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,<br>
+ bool isSubscript=false, bool AllowOnePastEnd=true);<br>
void CheckArrayAccess(const Expr *E);<br>
bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall);<br>
bool CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall);<br>
<br>
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=136997&r1=136996&r2=136997&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=136997&r1=136996&r2=136997&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)<br>
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Aug 5 18:18:04 2011<br>
@@ -3370,6 +3370,11 @@<br>
if (E->isTypeDependent() || E->isValueDependent())<br>
return;<br>
<br>
+ // Check for array bounds violations in cases where the check isn't triggered<br>
+ // elsewhere for other Expr types (like BinaryOperators), e.g. when an<br>
+ // ArraySubscriptExpr is on the RHS of a variable initialization.<br>
+ CheckArrayAccess(E);<br>
+<br>
// This is not the right CC for (e.g.) a variable initialization.<br>
AnalyzeImplicitConversions(*this, E, CC);<br>
}<br>
@@ -3474,6 +3479,15 @@<br>
<< TRange << Op->getSourceRange();<br>
}<br>
<br>
+static const Type* getElementType(const Expr *BaseExpr) {<br>
+ const Type* EltType = BaseExpr->getType().getTypePtr();<br>
+ if (EltType->isAnyPointerType())<br>
+ return EltType->getPointeeType().getTypePtr();<br>
+ else if (EltType->isArrayType())<br>
+ return EltType->getBaseElementTypeUnsafe();<br>
+ return EltType;<br>
+}<br></blockquote><div><br></div><div>I wonder if this would make sense to add to the AST itself...</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">
+<br>
/// \brief Check whether this array fits the idiom of a size-one tail padded<br>
/// array member of a struct.<br>
///<br>
@@ -3510,19 +3524,21 @@<br>
return false;<br>
}<br>
<br>
-static void CheckArrayAccess_Check(Sema &S,<br>
- const clang::ArraySubscriptExpr *E) {<br>
- const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts();<br>
+void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,<br>
+ bool isSubscript, bool AllowOnePastEnd) {<br>
+ const Type* EffectiveType = getElementType(BaseExpr);<br>
+ BaseExpr = BaseExpr->IgnoreParenCasts();<br>
+ IndexExpr = IndexExpr->IgnoreParenCasts();<br>
+<br>
const ConstantArrayType *ArrayTy =<br>
- S.Context.getAsConstantArrayType(BaseExpr->getType());<br>
+ Context.getAsConstantArrayType(BaseExpr->getType());<br>
if (!ArrayTy)<br>
return;<br>
<br>
- const Expr *IndexExpr = E->getIdx();<br>
if (IndexExpr->isValueDependent())<br>
return;<br>
llvm::APSInt index;<br>
- if (!IndexExpr->isIntegerConstantExpr(index, S.Context))<br>
+ if (!IndexExpr->isIntegerConstantExpr(index, Context))<br>
return;<br>
<br>
const NamedDecl *ND = NULL;<br>
@@ -3535,47 +3551,94 @@<br>
llvm::APInt size = ArrayTy->getSize();<br>
if (!size.isStrictlyPositive())<br>
return;<br>
+<br>
+ const Type* BaseType = getElementType(BaseExpr);<br>
+ if (!isSubscript && BaseType != EffectiveType) {<br>
+ // Make sure we're comparing apples to apples when comparing index to size<br>
+ uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType);<br>
+ uint64_t array_typesize = Context.getTypeSize(BaseType);<br></blockquote><div><br></div><div>It would be good to use more LLVM-style local variable names such as 'PtrArithTypeSize'</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">
+ if (ptrarith_typesize != array_typesize) {<br>
+ // There's a cast to a different size type involved<br>
+ uint64_t ratio = array_typesize / ptrarith_typesize;<br>
+ // TODO: Be smarter about handling cases where array_typesize is not a<br>
+ // multiple of ptrarith_typesize<br>
+ if (ptrarith_typesize * ratio == array_typesize)<br>
+ size *= llvm::APInt(size.getBitWidth(), ratio);<br>
+ }<br>
+ }<br>
+<br>
if (size.getBitWidth() > index.getBitWidth())<br>
index = index.sext(size.getBitWidth());<br>
else if (size.getBitWidth() < index.getBitWidth())<br>
size = size.sext(index.getBitWidth());<br>
<br>
- // Don't warn for valid indexes<br>
- if (index.slt(size))<br>
+ // For array subscripting the index must be less than size, but for pointer<br>
+ // arithmetic also allow the index (offset) to be equal to size since<br>
+ // computing the next address after the end of the array is legal and<br>
+ // commonly done e.g. in C++ iterators and range-based for loops.<br></blockquote><div><br></div><div>This doesn't seem to match the logic below, which appears (but perhaps I'm just misreading it) to allow one-past-the-end for code like "&foo[N]"... But maybe I've just misread the code below.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">
+ if (AllowOnePastEnd ? index.sle(size) : index.slt(size))<br>
return;<br>
<br>
// Also don't warn for arrays of size 1 which are members of some<br>
// structure. These are often used to approximate flexible arrays in C89<br>
// code.<br>
- if (IsTailPaddedMemberArray(S, size, ND))<br>
+ if (IsTailPaddedMemberArray(*this, size, ND))<br>
return;<br>
<br>
- S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,<br>
- S.PDiag(diag::warn_array_index_exceeds_bounds)<br>
- << index.toString(10, true)<br>
- << size.toString(10, true)<br>
- << (unsigned)size.ugt(1)<br>
- << IndexExpr->getSourceRange());<br>
+ unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds;<br>
+ if (isSubscript)<br>
+ DiagID = diag::warn_array_index_exceeds_bounds;<br>
+<br>
+ DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,<br>
+ PDiag(DiagID) << index.toString(10, true)<br>
+ << size.toString(10, true)<br>
+ << (unsigned)size.getLimitedValue(~0U)<br>
+ << IndexExpr->getSourceRange());<br>
} else {<br>
- S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr,<br>
- S.PDiag(diag::warn_array_index_precedes_bounds)<br>
- << index.toString(10, true)<br>
- << IndexExpr->getSourceRange());<br>
+ unsigned DiagID = diag::warn_array_index_precedes_bounds;<br>
+ if (!isSubscript) {<br>
+ DiagID = diag::warn_ptr_arith_precedes_bounds;<br>
+ if (index.isNegative()) index = -index;<br>
+ }<br>
+<br>
+ DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr,<br>
+ PDiag(DiagID) << index.toString(10, true)<br>
+ << IndexExpr->getSourceRange());<br>
}<br>
<br>
if (ND)<br>
- S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,<br>
- S.PDiag(diag::note_array_index_out_of_bounds)<br>
- << ND->getDeclName());<br>
+ DiagRuntimeBehavior(ND->getLocStart(), BaseExpr,<br>
+ PDiag(diag::note_array_index_out_of_bounds)<br>
+ << ND->getDeclName());<br>
}<br>
<br>
void Sema::CheckArrayAccess(const Expr *expr) {<br>
- while (true) {<br>
- expr = expr->IgnoreParens();<br>
+ int AllowOnePastEnd = 0;<br>
+ while (expr) {<br>
+ expr = expr->IgnoreParenImpCasts();<br>
switch (expr->getStmtClass()) {<br>
- case Stmt::ArraySubscriptExprClass:<br>
- CheckArrayAccess_Check(*this, cast<ArraySubscriptExpr>(expr));<br>
+ case Stmt::ArraySubscriptExprClass: {<br>
+ const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(expr);<br>
+ CheckArrayAccess(ASE->getBase(), ASE->getIdx(), true,<br>
+ AllowOnePastEnd > 0);<br>
return;<br>
+ }<br>
+ case Stmt::UnaryOperatorClass: {<br>
+ // Only unwrap the * and & unary operators<br>
+ const UnaryOperator *UO = cast<UnaryOperator>(expr);<br>
+ expr = UO->getSubExpr();<br>
+ switch (UO->getOpcode()) {<br>
+ case UO_AddrOf:<br>
+ AllowOnePastEnd++;<br>
+ break;<br>
+ case UO_Deref:<br>
+ AllowOnePastEnd--;<br>
+ break;<br>
+ default:<br>
+ return;<br>
+ }<br>
+ break;<br>
+ }<br>
case Stmt::ConditionalOperatorClass: {<br>
const ConditionalOperator *cond = cast<ConditionalOperator>(expr);<br>
if (const Expr *lhs = cond->getLHS())<br>
<br>
Modified: cfe/trunk/lib/Sema/SemaExpr.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=136997&r1=136996&r2=136997&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=136997&r1=136996&r2=136997&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)<br>
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Fri Aug 5 18:18:04 2011<br>
@@ -322,6 +322,7 @@<br>
// A glvalue of a non-function, non-array type T can be<br>
// converted to a prvalue.<br>
if (!E->isGLValue()) return Owned(E);<br>
+<br>
QualType T = E->getType();<br>
assert(!T.isNull() && "r-value conversion on typeless expression?");<br>
<br>
@@ -364,8 +365,6 @@<br>
// type of the lvalue.<br>
if (T.hasQualifiers())<br>
T = T.getUnqualifiedType();<br>
-<br>
- CheckArrayAccess(E);<br>
<br>
return Owned(ImplicitCastExpr::Create(Context, T, CK_LValueToRValue,<br>
E, 0, VK_RValue));<br>
@@ -3397,6 +3396,12 @@<br>
<br>
Arg = ArgExpr.takeAs<Expr>();<br>
}<br>
+<br>
+ // Check for array bounds violations for each argument to the call. This<br>
+ // check only triggers warnings when the argument isn't a more complex Expr<br>
+ // with its own checking, such as a BinaryOperator.<br>
+ CheckArrayAccess(Arg);<br>
+<br>
AllArgs.push_back(Arg);<br>
}<br>
<br>
@@ -5935,6 +5940,9 @@<br>
return QualType();<br>
}<br>
<br>
+ // Check array bounds for pointer arithemtic<br>
+ CheckArrayAccess(PExp, IExp);<br>
+<br>
if (CompLHSTy) {<br>
QualType LHSTy = Context.isPromotableBitField(lex.get());<br>
if (LHSTy.isNull()) {<br>
@@ -5991,6 +5999,12 @@<br>
if (!checkArithmeticOpPointerOperand(*this, Loc, lex.get()))<br>
return QualType();<br>
<br>
+ Expr *IExpr = rex.get()->IgnoreParenCasts();<br>
+ UnaryOperator negRex(IExpr, UO_Minus, IExpr->getType(), VK_RValue,<br></blockquote><div><br></div><div>Variables should be CamelCased. Also, maybe 'NegatedRHS' would be more clear? A bit unfortunate we're already using lex/rex here.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex;">
+ OK_Ordinary, IExpr->getExprLoc());<br>
+ // Check array bounds for pointer arithemtic<br>
+ CheckArrayAccess(lex.get()->IgnoreParenCasts(), &negRex);<br>
+<br>
if (CompLHSTy) *CompLHSTy = lex.get()->getType();<br>
return lex.get()->getType();<br>
}<br>
@@ -6984,9 +6998,7 @@<br>
return QualType();<br>
<br>
CheckForNullPointerDereference(*this, LHS);<br>
- // Check for trivial buffer overflows.<br>
- CheckArrayAccess(LHS->IgnoreParenCasts());<br>
-<br>
+<br>
// C99 6.5.16p3: The type of an assignment expression is the type of the<br>
// left operand unless the left operand has qualified type, in which case<br>
// it is the unqualified version of the type of the left operand.<br>
@@ -7721,6 +7733,11 @@<br>
}<br>
if (ResultTy.isNull() || lhs.isInvalid() || rhs.isInvalid())<br>
return ExprError();<br>
+<br>
+ // Check for array bounds violations for both sides of the BinaryOperator<br>
+ CheckArrayAccess(lhs.get());<br>
+ CheckArrayAccess(rhs.get());<br>
+<br>
if (CompResultTy.isNull())<br>
return Owned(new (Context) BinaryOperator(lhs.take(), rhs.take(), Opc,<br>
ResultTy, VK, OK, OpLoc));<br>
@@ -8071,6 +8088,13 @@<br>
if (resultType.isNull() || Input.isInvalid())<br>
return ExprError();<br>
<br>
+ // Check for array bounds violations in the operand of the UnaryOperator,<br>
+ // except for the '*' and '&' operators that have to be handled specially<br>
+ // by CheckArrayAccess (as there are special cases like &array[arraysize]<br>
+ // that are explicitly defined as valid by the standard).<br>
+ if (Opc != UO_AddrOf && Opc != UO_Deref)<br>
+ CheckArrayAccess(Input.get());<br>
+<br>
return Owned(new (Context) UnaryOperator(Input.take(), Opc, resultType,<br>
VK, OK, OpLoc));<br>
}<br>
<br>
Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=136997&r1=136996&r2=136997&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=136997&r1=136996&r2=136997&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)<br>
+++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Fri Aug 5 18:18:04 2011<br>
@@ -2264,9 +2264,6 @@<br>
if (!From->isGLValue()) break;<br>
}<br>
<br>
- // Check for trivial buffer overflows.<br>
- CheckArrayAccess(From);<br>
-<br>
FromType = FromType.getUnqualifiedType();<br>
From = ImplicitCastExpr::Create(Context, FromType, CK_LValueToRValue,<br>
From, 0, VK_RValue);<br>
<br>
Modified: cfe/trunk/test/SemaCXX/array-bounds.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/array-bounds.cpp?rev=136997&r1=136996&r2=136997&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/array-bounds.cpp?rev=136997&r1=136996&r2=136997&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/test/SemaCXX/array-bounds.cpp (original)<br>
+++ cfe/trunk/test/SemaCXX/array-bounds.cpp Fri Aug 5 18:18:04 2011<br>
@@ -37,11 +37,16 @@<br>
s2.a[3] = 0; // no warning for 0-sized array<br>
<br>
union {<br>
- short a[2]; // expected-note {{declared here}}<br>
+ short a[2]; // expected-note 4 {{declared here}}<br>
char c[4];<br>
} u;<br>
u.a[3] = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}}<br>
u.c[3] = 1; // no warning<br>
+ short *p = &u.a[2]; // no warning<br>
+ p = &u.a[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}}<br>
+ *(&u.a[2]) = 1; // expected-warning {{array index of '2' indexes past the end of an array (that contains 2 elements)}}<br>
+ *(&u.a[3]) = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}}<br>
+ *(&u.c[3]) = 1; // no warning<br>
<br>
const int const_subscript = 3;<br>
int array[2]; // expected-note {{declared here}}<br>
@@ -153,8 +158,7 @@<br>
enum enumA { enumA_A, enumA_B, enumA_C, enumA_D, enumA_E };<br>
enum enumB { enumB_X, enumB_Y, enumB_Z };<br>
static enum enumB myVal = enumB_X;<br>
-void test_nested_switch()<br>
-{<br>
+void test_nested_switch() {<br>
switch (enumA_E) { // expected-warning {{no case matching constant}}<br>
switch (myVal) { // expected-warning {{enumeration values 'enumB_X' and 'enumB_Z' not handled in switch}}<br>
case enumB_Y: ;<br>
@@ -199,3 +203,14 @@<br>
B->c[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 element)}}<br>
}<br>
}<br>
+<br>
+void bar(int x) {}<br>
+int test_more() {<br>
+ int foo[5]; // expected-note 5 {{array 'foo' declared here}}<br>
+ bar(foo[5]); // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}}<br>
+ ++foo[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}}<br>
+ if (foo[6]) // expected-warning {{array index of '6' indexes past the end of an array (that contains 5 elements)}}<br>
+ return --foo[6]; // expected-warning {{array index of '6' indexes past the end of an array (that contains 5 elements)}}<br>
+ else<br>
+ return foo[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}}<br>
+}<br>
<br>
<br>
_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
</blockquote></div><br>