[clang-tools-extra] [clang] [llvm] [analyzer] Support interestingness in ArrayBoundV2 (PR #78315)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 22 07:00:12 PST 2024
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>,
=?utf-8?q?Donát?= Nagy <donat.nagy at ericsson.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/78315 at github.com>
https://github.com/NagyDonat updated https://github.com/llvm/llvm-project/pull/78315
>From c75c05c6e894a46797913c5bdccb240cbcc01ae9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Tue, 12 Dec 2023 13:07:54 +0100
Subject: [PATCH 1/4] [analyzer] Support interestingness in ArrayBoundV2
This commit improves alpha.security.ArrayBoundV2 in two connected areas:
(1) It calls `markInteresting()` on the symbolic values that are
responsible for the out of bounds access.
(2) Its index-is-in-bounds assumptions are reported in note tags if they
provide information about the value of an interesting symbol.
This commit is limited to "display" changes: it introduces new
diagnostic pieces (potentially to bugs found by other checkers), but the
ArrayBoundV2 will make the same assumptions and detect the same bugs
before and after this change.
As a minor unrelated change, this commit also updates/removes some very
old comments which became obsolate due to my previous changes.
---
.../Checkers/ArrayBoundCheckerV2.cpp | 345 ++++++++++++++----
.../test/Analysis/out-of-bounds-diagnostics.c | 10 +
clang/test/Analysis/out-of-bounds-notes.c | 128 +++++++
3 files changed, 413 insertions(+), 70 deletions(-)
create mode 100644 clang/test/Analysis/out-of-bounds-notes.c
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 6c7a1601402efa..4241fa3a2ce8af 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -33,7 +33,66 @@ using namespace taint;
using llvm::formatv;
namespace {
-enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
+class StateUpdateReporter {
+ const SubRegion *Reg;
+ NonLoc ByteOffsetVal;
+ std::optional<QualType> ElementType = std::nullopt;
+ std::optional<int64_t> ElementSize = std::nullopt;
+ bool AssumedNonNegative = false;
+ std::optional<NonLoc> AssumedUpperBound = std::nullopt;
+
+public:
+ StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
+ CheckerContext &C)
+ : Reg(R), ByteOffsetVal(ByteOffsVal) {
+ initializeElementInfo(E, C);
+ }
+
+ void initializeElementInfo(const Expr *E, CheckerContext &C) {
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
+ const MemRegion *SubscriptBaseReg =
+ C.getSVal(ASE->getBase()).getAsRegion();
+ if (!SubscriptBaseReg)
+ return;
+ SubscriptBaseReg = SubscriptBaseReg->StripCasts();
+ if (!isa_and_nonnull<ElementRegion>(SubscriptBaseReg)) {
+ ElementType = ASE->getType();
+ ElementSize =
+ C.getASTContext().getTypeSizeInChars(*ElementType).getQuantity();
+ }
+ }
+ }
+ void recordNonNegativeAssumption() { AssumedNonNegative = true; }
+ void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
+ AssumedUpperBound = UpperBoundVal;
+ }
+
+ const NoteTag *createNoteTag(CheckerContext &C) const;
+
+private:
+ std::string getMessage(PathSensitiveBugReport &BR) const;
+
+ /// Return true if information about the value of `Sym` can put constraints
+ /// on some symbol which is interesting within the bug report `BR`.
+ /// In particular, this returns true when `Sym` is interesting within `BR`;
+ /// but it also returns true if `Sym` is an expression that contains integer
+ /// constants and a single symbolic operand which is interesting (in `BR`).
+ /// We need to use this instead of plain `BR.isInteresting()` because if we
+ /// are analyzing code like
+ /// int array[10];
+ /// int f(int arg) {
+ /// return array[arg] && array[arg + 10];
+ /// }
+ /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
+ /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
+ /// to detect this out of bounds access).
+ static bool providesInformationAboutInteresting(SymbolRef Sym,
+ PathSensitiveBugReport &BR);
+ static bool providesInformationAboutInteresting(SVal SV,
+ PathSensitiveBugReport &BR) {
+ return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
+ }
+};
struct Messages {
std::string Short, Full;
@@ -54,11 +113,14 @@ class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>,
void performCheck(const Expr *E, CheckerContext &C) const;
- void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
- NonLoc Offset, Messages Msgs) const;
+ void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
+ NonLoc Offset, bool IsTaintBug = false) const;
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
+ static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
+ NonLoc Offset, NonLoc Limit,
+ CheckerContext &C);
static bool isInAddressOf(const Stmt *S, ASTContext &AC);
public:
@@ -133,12 +195,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
return std::nullopt;
}
-// TODO: once the constraint manager is smart enough to handle non simplified
-// symbolic expressions remove this function. Note that this can not be used in
-// the constraint manager as is, since this does not handle overflows. It is
-// safe to assume, however, that memory offsets will not overflow.
-// NOTE: callers of this function need to be aware of the effects of overflows
-// and signed<->unsigned conversions!
+// NOTE: This function is the "heart" of this checker. It simplifies
+// inequalities with transformations that are valid (and very elementary) in
+// pure mathematics, but become invalid if we use them in C++ number model
+// where the calculations may overflow.
+// Due to the overflow issues I think it's impossible (or at least not
+// practical) to integrate this kind of simplification into the resolution of
+// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
+// produces valid results if the arguments are memory offsets which are known
+// to be << SIZE_MAX.
+// TODO: This algorithm should be moved to a central location where it's
+// available for other checkers that need to compare memory offsets.
+// NOTE: When using the results of this function, don't forget that `evalBinOp`
+// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`!
static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
SValBuilder &svalBuilder) {
@@ -239,13 +308,8 @@ static std::optional<int64_t> getConcreteValue(NonLoc SV) {
return std::nullopt;
}
-static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
- static const char *ShortMsgTemplates[] = {
- "Out of bound access to memory preceding {0}",
- "Out of bound access to memory after the end of {0}",
- "Potential out of bound access to {0} with tainted offset"};
-
- return formatv(ShortMsgTemplates[Kind], RegName);
+static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
+ return SV ? getConcreteValue(*SV) : std::nullopt;
}
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
@@ -255,7 +319,28 @@ static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
Out << "Access of " << RegName << " at negative byte offset";
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
Out << ' ' << ConcreteIdx->getValue();
- return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)};
+ return {formatv("Out of bound access to memory preceding {0}", RegName),
+ std::string(Buf)};
+}
+
+/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
+/// it can be performed (`Divisor` is nonzero and there is no remainder). The
+/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
+/// division is considered to be successful.
+bool tryDividePair(std::optional<int64_t> &Val1, std::optional<int64_t> &Val2,
+ int64_t Divisor) {
+ if (!Divisor)
+ return false;
+ const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
+ const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
+ if (!Val1HasRemainder && !Val2HasRemainder) {
+ if (Val1)
+ *Val1 /= Divisor;
+ if (Val2)
+ *Val2 /= Divisor;
+ return true;
+ }
+ return false;
}
static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
@@ -268,18 +353,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
- bool UseByteOffsets = true;
- if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
- const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
- const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
- if (!OffsetHasRemainder && !ExtentHasRemainder) {
- UseByteOffsets = false;
- if (OffsetN)
- *OffsetN /= ElemSize;
- if (ExtentN)
- *ExtentN /= ElemSize;
- }
- }
+ int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
+
+ bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
@@ -307,7 +383,9 @@ static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region,
Out << "s";
}
- return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)};
+ return {
+ formatv("Out of bound access to memory after the end of {0}", RegName),
+ std::string(Buf)};
}
static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
@@ -318,17 +396,87 @@ static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) {
RegName, OffsetName)};
}
-void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
- // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
- // some new logic here that reasons directly about memory region extents.
- // Once that logic is more mature, we can bring it back to assumeInBound()
- // for all clients to use.
- //
- // The algorithm we are using here for bounds checking is to see if the
- // memory access is within the extent of the base region. Since we
- // have some flexibility in defining the base region, we can achieve
- // various levels of conservatism in our buffer overflow checking.
+const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
+ // Don't create a note tag if we didn't assume anything:
+ if (!AssumedNonNegative && !AssumedUpperBound)
+ return nullptr;
+
+ return C.getNoteTag(
+ [*this](PathSensitiveBugReport &BR) -> std::string {
+ return getMessage(BR);
+ },
+ /*isPrunable=*/false);
+}
+
+std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
+ bool ShouldReportNonNegative = AssumedNonNegative;
+ if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
+ if (AssumedUpperBound &&
+ providesInformationAboutInteresting(*AssumedUpperBound, BR))
+ ShouldReportNonNegative = false;
+ else
+ return "";
+ }
+
+ std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
+ std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
+ const bool UseIndex =
+ ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
+
+ SmallString<256> Buf;
+ llvm::raw_svector_ostream Out(Buf);
+ Out << "Assuming ";
+ if (UseIndex) {
+ Out << "index ";
+ if (OffsetN)
+ Out << "'" << OffsetN << "' ";
+ } else if (AssumedUpperBound) {
+ Out << "byte offset ";
+ if (OffsetN)
+ Out << "'" << OffsetN << "' ";
+ } else {
+ Out << "offset ";
+ }
+
+ Out << "is";
+ if (ShouldReportNonNegative) {
+ Out << " non-negative";
+ }
+ if (AssumedUpperBound) {
+ if (ShouldReportNonNegative)
+ Out << " and";
+ Out << " less than ";
+ if (ExtentN)
+ Out << *ExtentN << ", ";
+ if (UseIndex && ElementType)
+ Out << "the number of '" << ElementType->getAsString()
+ << "' elements in ";
+ else
+ Out << "the extent of ";
+ Out << getRegionName(Reg);
+ }
+ return std::string(Out.str());
+}
+
+bool StateUpdateReporter::providesInformationAboutInteresting(
+ SymbolRef Sym, PathSensitiveBugReport &BR) {
+ if (!Sym)
+ return false;
+ for (SymbolRef PartSym : Sym->symbols()) {
+ // The interestingess mark may appear on any layer as we're stripping off
+ // the SymIntExpr, UnarySymExpr etc. layers...
+ if (BR.isInteresting(PartSym))
+ return true;
+ // ...but if both sides of the expression are symbolic (i.e. unknown), then
+ // the analyzer can't use the combined result to constrain the operands.
+ if (isa<SymSymExpr>(PartSym))
+ return false;
+ }
+ return false;
+}
+
+void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
const SVal Location = C.getSVal(E);
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
@@ -350,6 +498,10 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
auto [Reg, ByteOffset] = *RawOffset;
+ // The state updates will be reported as a single note tag, which will be
+ // composed by this helper class.
+ StateUpdateReporter SUR(Reg, ByteOffset, E, C);
+
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace();
if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
@@ -363,13 +515,22 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
- if (PrecedesLowerBound && !WithinLowerBound) {
- // We know that the index definitely precedes the lower bound.
- Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
- reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs);
- return;
+ if (PrecedesLowerBound) {
+ // The offset may be invalid (negative)...
+ if (!WithinLowerBound) {
+ // ...and it cannot be valid (>= 0), so report an error.
+ Messages Msgs = getPrecedesMsgs(Reg, ByteOffset);
+ reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset);
+ return;
+ }
+ // ...but it can be valid as well, so the checker will (optimistically)
+ // assume that it's valid and mention this in the note tag.
+ SUR.recordNonNegativeAssumption();
}
+ // Actually update the state. The "if" only fails in the extremely unlikely
+ // case when compareValueToThreshold returns {nullptr, nullptr} becasue
+ // evalBinOpNN fails to evaluate the less-than operator.
if (WithinLowerBound)
State = WithinLowerBound;
}
@@ -381,32 +542,30 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
if (ExceedsUpperBound) {
+ // The offset may be invalid (>= Size)...
if (!WithinUpperBound) {
- // We know that the index definitely exceeds the upper bound.
- if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
- // ...but this is within an addressof expression, so we need to check
- // for the exceptional case that `&array[size]` is valid.
- auto [EqualsToThreshold, NotEqualToThreshold] =
- compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize,
- SVB, /*CheckEquality=*/true);
- if (EqualsToThreshold && !NotEqualToThreshold) {
- // We are definitely in the exceptional case, so return early
- // instead of reporting a bug.
- C.addTransition(EqualsToThreshold);
- return;
- }
+ // ...and it cannot be within bounds, so report an error, unless we can
+ // definitely determine that this is an idiomatic `&array[size]`
+ // expression that calculates the past-the-end pointer.
+ if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
+ *KnownSize, C)) {
+ // FIXME: this duplicates the `addTransition` at the end of the
+ // function, but `goto` is taboo nowdays.
+ C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
+ return;
}
+
Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset,
*KnownSize, Location);
- reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs);
+ reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset);
return;
}
+ // ...and it can be valid as well...
if (isTainted(State, ByteOffset)) {
- // Both cases are possible, but the offset is tainted, so report.
- std::string RegName = getRegionName(Reg);
+ // ...but it's tainted, so report an error.
- // Diagnostic detail: "tainted offset" is always correct, but the
- // common case is that 'idx' is tainted in 'arr[idx]' and then it's
+ // Diagnostic detail: saying "tainted offset" is always correct, but
+ // the common case is that 'idx' is tainted in 'arr[idx]' and then it's
// nicer to say "tainted index".
const char *OffsetName = "offset";
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
@@ -414,33 +573,67 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
OffsetName = "index";
Messages Msgs = getTaintMsgs(Reg, OffsetName);
- reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs);
+ reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, /*IsTaintBug=*/true);
return;
}
+ // ...and it isn't tainted, so the checker will (optimistically) assume
+ // that the offset is in bounds and mention this in the note tag.
+ SUR.recordUpperBoundAssumption(*KnownSize);
}
+ // Actually update the state. The "if" only fails in the extremely unlikely
+ // case when compareValueToThreshold returns {nullptr, nullptr} becasue
+ // evalBinOpNN fails to evaluate the less-than operator.
if (WithinUpperBound)
State = WithinUpperBound;
}
- C.addTransition(State);
+ // Add a transition, reporting the state updates that we accumulated.
+ C.addTransition(State, SUR.createNoteTag(C));
}
void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
- ProgramStateRef ErrorState, OOB_Kind Kind,
- NonLoc Offset, Messages Msgs) const {
+ ProgramStateRef ErrorState, Messages Msgs,
+ NonLoc Offset, bool IsTaintBug) const {
ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
if (!ErrorNode)
return;
auto BR = std::make_unique<PathSensitiveBugReport>(
- Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
+ IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode);
+
+ // FIXME: ideally we would just call trackExpressionValue() and that would
+ // "do the right thing": mark the relevant symbols as interesting, track the
+ // control dependencies and statements storing the relevant values and add
+ // helpful diagnostic pieces. However, right now trackExpressionValue() is
+ // a heap of unreliable heuristics, so it would cause several issues:
+ // - Interestingness is not applied consistently, e.g. if `array[x+10]`
+ // causes an overflow, then `x` is not marked as interesting.
+ // - We get irrelevant diagnostic pieces, e.g. in the code
+ // `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;`
+ // it places a "Storing uninitialized value" note on the `malloc` call
+ // (which is technically true, but irrelevant).
+ // If trackExpressionValue() becomes reliable, it should be applied instead
+ // of the manual markInteresting() calls.
+
+ if (SymbolRef OffsetSym = Offset.getAsSymbol()) {
+ // If the offset is a symbolic value, iterate over its "parts" with
+ // `SymExpr::symbols()` and mark each of them as interesting.
+ // For example, if the offset is `x*4 + y` then we put interestingness onto
+ // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols
+ // `x` and `y`.
+ for (SymbolRef PartSym : OffsetSym->symbols())
+ BR->markInteresting(PartSym);
+ }
- // Track back the propagation of taintedness.
- if (Kind == OOB_Taint)
+ if (IsTaintBug) {
+ // If the issue that we're reporting depends on the taintedness of the
+ // offset, then put interestingness onto symbols that could be the origin
+ // of the taint.
for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
BR->markInteresting(Sym);
+ }
C.emitReport(std::move(BR));
}
@@ -476,6 +669,18 @@ bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
}
+bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E,
+ ProgramStateRef State,
+ NonLoc Offset, NonLoc Limit,
+ CheckerContext &C) {
+ if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
+ auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(
+ State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true);
+ return EqualsToThreshold && !NotEqualToThreshold;
+ }
+ return false;
+}
+
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundCheckerV2>();
}
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 769a8954f79669..7a8f59650fef16 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -195,12 +195,22 @@ void *malloc(size_t size);
int *mallocRegion(void) {
int *mem = (int*)malloc(2*sizeof(int));
+
mem[3] = -2;
// expected-warning at -1 {{Out of bound access to memory after the end of the heap area}}
// expected-note at -2 {{Access of the heap area at index 3, while it holds only 2 'int' elements}}
return mem;
}
+int *mallocRegionDeref(void) {
+ int *mem = (int*)malloc(2*sizeof(int));
+
+ *(mem + 3) = -2;
+ // expected-warning at -1 {{Out of bound access to memory after the end of the heap area}}
+ // expected-note at -2 {{Access of the heap area at index 3, while it holds only 2 'int' elements}}
+ return mem;
+}
+
void *alloca(size_t size);
int allocaRegion(void) {
diff --git a/clang/test/Analysis/out-of-bounds-notes.c b/clang/test/Analysis/out-of-bounds-notes.c
new file mode 100644
index 00000000000000..290df79f3e18a4
--- /dev/null
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -0,0 +1,128 @@
+// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \
+// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint,debug.ExprInspection -verify %s
+
+int array[10];
+
+void clang_analyzer_value(unsigned arg);
+
+int irrelevantAssumptions(int arg) {
+ int a = array[arg];
+ // Here the analyzer assumes that `arg` is in bounds, but doesn't report this
+ // because `arg` is not interesting for the bug.
+ int b = array[13];
+ // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+ // expected-note at -2 {{Access of 'array' at index 13, while it holds only 10 'int' elements}}
+ return a + b;
+}
+
+
+int assumingBoth(int arg) {
+ int a = array[arg];
+ // expected-note at -1 {{Assuming index is non-negative and less than 10, the number of 'int' elements in 'array'}}
+ int b = array[arg]; // no additional note, we already assumed that 'arg' is in bounds
+ int c = array[arg + 10];
+ // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+ // expected-note at -2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}}
+ return a + b + c;
+}
+
+int assumingLower(int arg) {
+ // expected-note at +2 {{Assuming 'arg' is < 10}}
+ // expected-note at +1 {{Taking false branch}}
+ if (arg >= 10)
+ return 0;
+ int a = array[arg];
+ // expected-note at -1 {{Assuming index is non-negative}}
+ int b = array[arg + 10];
+ // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+ // expected-note at -2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}}
+ return a + b;
+}
+
+int assumingUpper(int arg) {
+ // expected-note at +2 {{Assuming 'arg' is >= 0}}
+ // expected-note at +1 {{Taking false branch}}
+ if (arg < 0)
+ return 0;
+ int a = array[arg];
+ // expected-note at -1 {{Assuming index is less than 10, the number of 'int' elements in 'array'}}
+ int b = array[arg - 10];
+ // expected-warning at -1 {{Out of bound access to memory preceding 'array'}}
+ // expected-note at -2 {{Access of 'array' at negative byte offset}}
+ return a + b;
+}
+
+int assumingUpperIrrelevant(int arg) {
+ // FIXME: The assumption "assuming index is less than 10" is printed because
+ // it's assuming something about the interesting variable `arg`; however,
+ // it's irrelevant because in this testcase the out of bound access is
+ // deduced from the _lower_ bound on `arg`. Currently the analyzer cannot
+ // filter out assumptions that are logically irrelevant but "touch"
+ // interesting symbols; eventually it would be good to add support for this.
+
+ // expected-note at +2 {{Assuming 'arg' is >= 0}}
+ // expected-note at +1 {{Taking false branch}}
+ if (arg < 0)
+ return 0;
+ int a = array[arg];
+ // expected-note at -1 {{Assuming index is less than 10, the number of 'int' elements in 'array'}}
+ int b = array[arg + 10];
+ // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+ // expected-note at -2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}}
+ return a + b;
+}
+
+int assumingUpperUnsigned(unsigned arg) {
+ int a = array[arg];
+ // expected-note at -1 {{Assuming index is less than 10, the number of 'int' elements in 'array'}}
+ int b = array[(int)arg - 10];
+ // expected-warning at -1 {{Out of bound access to memory preceding 'array'}}
+ // expected-note at -2 {{Access of 'array' at negative byte offset}}
+ return a + b;
+}
+
+int assumingNothing(unsigned arg) {
+ // expected-note at +2 {{Assuming 'arg' is < 10}}
+ // expected-note at +1 {{Taking false branch}}
+ if (arg >= 10)
+ return 0;
+ int a = array[arg]; // no note here, we already know that 'arg' is in bounds
+ int b = array[(int)arg - 10];
+ // expected-warning at -1 {{Out of bound access to memory preceding 'array'}}
+ // expected-note at -2 {{Access of 'array' at negative byte offset}}
+ return a + b;
+}
+
+short assumingConverted(int arg) {
+ // When indices are reported, the note will use the element type that's the
+ // result type of the subscript operator.
+ char *cp = (char*)array;
+ char a = cp[arg];
+ // expected-note at -1 {{Assuming index is non-negative and less than 40, the number of 'char' elements in 'array'}}
+ char b = cp[arg]; // no additional note, we already assumed that 'arg' is in bounds
+ char c = cp[arg + 40];
+ // expected-warning at -1 {{Out of bound access to memory after the end of 'array'}}
+ // expected-note at -2 {{Access of 'array' at an overflowing index, while it holds only 40 'char' elements}}
+ return a + b + c;
+}
+
+struct foo {
+ int num;
+ char a[8];
+ char b[5];
+};
+
+int assumingConverted2(struct foo f, int arg) {
+ // When indices are reported, the note will use the element type that's the
+ // result type of the subscript operator.
+ int a = ((int*)(f.a))[arg];
+ // expected-note at -1 {{Assuming index is non-negative and less than 2, the number of 'int' elements in 'f.a'}}
+ // However, if the extent of the memory region is not divisible by the
+ // element size, the checker measures the offset and extent in bytes.
+ int b = ((int*)(f.b))[arg];
+ // expected-note at -1 {{Assuming byte offset is less than 5, the extent of 'f.b'}}
+ int c = array[arg-2];
+ // expected-warning at -1 {{Out of bound access to memory preceding 'array'}}
+ // expected-note at -2 {{Access of 'array' at negative byte offset}}
+ return a + b + c;
+}
>From c0a09db0fbb48626bbb069f85a587d2c94a8998a Mon Sep 17 00:00:00 2001
From: NagyDonat <donat.nagy at ericsson.com>
Date: Fri, 19 Jan 2024 19:10:54 +0100
Subject: [PATCH 2/4] Remove unused reference to debug.ExprInspection
---
clang/test/Analysis/out-of-bounds-notes.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/test/Analysis/out-of-bounds-notes.c b/clang/test/Analysis/out-of-bounds-notes.c
index 290df79f3e18a4..3070ab643d832e 100644
--- a/clang/test/Analysis/out-of-bounds-notes.c
+++ b/clang/test/Analysis/out-of-bounds-notes.c
@@ -1,10 +1,8 @@
// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-output=text \
-// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint,debug.ExprInspection -verify %s
+// RUN: -analyzer-checker=core,alpha.security.ArrayBoundV2,unix.Malloc,alpha.security.taint -verify %s
int array[10];
-void clang_analyzer_value(unsigned arg);
-
int irrelevantAssumptions(int arg) {
int a = array[arg];
// Here the analyzer assumes that `arg` is in bounds, but doesn't report this
>From 18c00f79123ed8b22feefdb3b4bcc2f08b59c79b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 22 Jan 2024 15:26:57 +0100
Subject: [PATCH 3/4] Improve comments (as suggested on code review)
---
.../Checkers/ArrayBoundCheckerV2.cpp | 30 +++++++++++--------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 4241fa3a2ce8af..593095a2bebafe 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -202,12 +202,19 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
// Due to the overflow issues I think it's impossible (or at least not
// practical) to integrate this kind of simplification into the resolution of
// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
-// produces valid results if the arguments are memory offsets which are known
-// to be << SIZE_MAX.
+// produces valid results when the calculations are handling memory offsets
+// and every value is well below SIZE_MAX.
// TODO: This algorithm should be moved to a central location where it's
// available for other checkers that need to compare memory offsets.
-// NOTE: When using the results of this function, don't forget that `evalBinOp`
-// uses the evaluation rules of C++, so e.g. `(size_t)123 < -1`!
+// NOTE: the simplification preserves the order of the two operands in a
+// mathematical sense, but it may change the result produced by a C++
+// comparison operator (and the automatic type conversions).
+// For example, consider a comparison "X+1 < 0", where the LHS is stored as a
+// size_t and the RHS is stored in an int. (As size_t is unsigned, this
+// comparison is false for all values of "X".) However, the simplification may
+// turn it into "X < -1", which is still always false in a mathematical sense,
+// but can produce a true result when evaluated by `evalBinOp` (which follows
+// the rules of C++ and casts -1 to SIZE_MAX).
static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
SValBuilder &svalBuilder) {
@@ -401,11 +408,9 @@ const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
if (!AssumedNonNegative && !AssumedUpperBound)
return nullptr;
- return C.getNoteTag(
- [*this](PathSensitiveBugReport &BR) -> std::string {
- return getMessage(BR);
- },
- /*isPrunable=*/false);
+ return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {
+ return getMessage(BR);
+ });
}
std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
@@ -468,8 +473,9 @@ bool StateUpdateReporter::providesInformationAboutInteresting(
// the SymIntExpr, UnarySymExpr etc. layers...
if (BR.isInteresting(PartSym))
return true;
- // ...but if both sides of the expression are symbolic (i.e. unknown), then
- // the analyzer can't use the combined result to constrain the operands.
+ // ...but if both sides of the expression are symbolic, then there is no
+ // practical algorithm to produce separate constraints for the two
+ // operands (from the single combined result).
if (isa<SymSymExpr>(PartSym))
return false;
}
@@ -549,8 +555,6 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
// expression that calculates the past-the-end pointer.
if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
*KnownSize, C)) {
- // FIXME: this duplicates the `addTransition` at the end of the
- // function, but `goto` is taboo nowdays.
C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
return;
}
>From 6ed41d51078796983983b777af6067056de78299 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 22 Jan 2024 15:56:30 +0100
Subject: [PATCH 4/4] Make a function 'static'
---
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 593095a2bebafe..e6f0cc0cbf0dd3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -334,8 +334,8 @@ static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
/// it can be performed (`Divisor` is nonzero and there is no remainder). The
/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
/// division is considered to be successful.
-bool tryDividePair(std::optional<int64_t> &Val1, std::optional<int64_t> &Val2,
- int64_t Divisor) {
+static bool tryDividePair(std::optional<int64_t> &Val1,
+ std::optional<int64_t> &Val2, int64_t Divisor) {
if (!Divisor)
return false;
const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
More information about the cfe-commits
mailing list