[clang] de25473 - [analyzer] Fix comparison logic in ArrayBoundCheckerV2
Donát Nagy via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 26 06:07:17 PDT 2023
Author: Donát Nagy
Date: 2023-04-26T15:02:23+02:00
New Revision: de2547329b41ad6ea4ea876d12731bde5a6b64c5
URL: https://github.com/llvm/llvm-project/commit/de2547329b41ad6ea4ea876d12731bde5a6b64c5
DIFF: https://github.com/llvm/llvm-project/commit/de2547329b41ad6ea4ea876d12731bde5a6b64c5.diff
LOG: [analyzer] Fix comparison logic in ArrayBoundCheckerV2
The prototype checker alpha.security.ArrayBoundV2 performs two
comparisons to check that in an expression like Array[Index]
0 <= Index < length(Array)
holds. These comparisons are handled by almost identical logic: the
inequality is first rearranged by getSimplifiedOffsets(), then evaluated
with evalBinOpNN().
However the simplification used "naive" elementary mathematical
schematics, but evalBinOpNN() performed the signed -> unsigned
conversions described in the C/C++ standards, and this confusion led to
wildly inaccurate results: false positives from the lower bound check
and false negatives from the upper bound check.
This commit eliminates the code duplication by moving the comparison
logic into a separate function, then adds an explicit check to this
unified code path, which handles the problematic case separately.
In addition to this, the commit also cleans up a testcase that was
demonstrating the presence of this problem. Note that while that
testcase was failing with an overflow error, its actual problem was in
the underflow handler logic:
(0) The testcase introduces a five-element array "char a[5]" and an
unknown argument "size_t len"; then evaluates "a[len+1]".
(1) The underflow check tries to determine whether "len+1 < 0" holds.
(2) This inequality is rearranged to "len < -1".
(3) evalBinOpNN() evaluates this with the schematics of C/C++ and
converts -1 to the size_t value SIZE_MAX.
(4) The engine concludes that len == SIZE_MAX, because otherwise we'd
have an underflow here.
(5) The overflow check tries to determine whether "len+1 >= 5".
(6) This inequality is rearranged to "len >= 4".
(7) The engine substitutes len == SIZE_MAX and reports that we have
an overflow.
Differential Revision: https://reviews.llvm.org/D135375
Added:
clang/test/Analysis/array-bound-v2-constraint-check.c
Modified:
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Removed:
clang/test/Analysis/out-of-bounds-false-positive.c
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 142577174ead8..a476613b6dcc7 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -57,8 +57,8 @@ class RegionRawOffsetV2 {
: baseRegion(nullptr), byteOffset(UnknownVal()) {}
public:
- RegionRawOffsetV2(const SubRegion* base, SVal offset)
- : baseRegion(base), byteOffset(offset) {}
+ RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
+ : baseRegion(base), byteOffset(offset) { assert(base); }
NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
const SubRegion *getRegion() const { return baseRegion; }
@@ -72,19 +72,12 @@ class RegionRawOffsetV2 {
};
}
-static SVal computeExtentBegin(SValBuilder &svalBuilder,
- const MemRegion *region) {
- const MemSpaceRegion *SR = region->getMemorySpace();
- if (SR->getKind() == MemRegion::UnknownSpaceRegionKind)
- return UnknownVal();
- else
- return svalBuilder.makeZeroArrayIndex();
-}
-
// TODO: once the constraint manager is smart enough to handle non simplified
// symbolic expressions remove this function. Note that this can not be used in
// the constraint manager as is, since this does not handle overflows. It is
// safe to assume, however, that memory offsets will not overflow.
+// NOTE: callers of this function need to be aware of the effects of overflows
+// and signed<->unsigned conversions!
static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
SValBuilder &svalBuilder) {
@@ -117,6 +110,38 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
}
+// Evaluate the comparison Value < Threshold with the help of the custom
+// simplification algorithm defined for this checker. Return a pair of states,
+// where the first one corresponds to "value below threshold" and the second
+// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
+// the case when the evaluation fails.
+static std::pair<ProgramStateRef, ProgramStateRef>
+compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
+ SValBuilder &SVB) {
+ if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
+ std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
+ }
+ if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
+ QualType T = Value.getType(SVB.getContext());
+ if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) {
+ // In this case we reduced the bound check to a comparison of the form
+ // (symbol or value with unsigned type) < (negative number)
+ // which is always false. We are handling these cases separately because
+ // evalBinOpNN can perform a signed->unsigned conversion that turns the
+ // negative number into a huge positive value and leads to wildly
+ // inaccurate conclusions.
+ return {nullptr, State};
+ }
+ }
+ auto BelowThreshold =
+ SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
+
+ if (BelowThreshold)
+ return State->assume(*BelowThreshold);
+
+ return {nullptr, nullptr};
+}
+
void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
const Stmt* LoadS,
CheckerContext &checkerContext) const {
@@ -139,95 +164,55 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
if (!rawOffset.getRegion())
return;
- NonLoc rawOffsetVal = rawOffset.getByteOffset();
+ NonLoc ByteOffset = rawOffset.getByteOffset();
- // CHECK LOWER BOUND: Is byteOffset < extent begin?
- // If so, we are doing a load/store
- // before the first valid offset in the memory region.
-
- SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
-
- if (std::optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) {
- if (auto ConcreteNV = NV->getAs<nonloc::ConcreteInt>()) {
- std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
- getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteNV,
- svalBuilder);
- rawOffsetVal = simplifiedOffsets.first;
- *NV = simplifiedOffsets.second;
- }
+ // CHECK LOWER BOUND
+ const MemSpaceRegion *SR = rawOffset.getRegion()->getMemorySpace();
+ if (!llvm::isa<UnknownSpaceRegion>(SR)) {
+ // A pointer to UnknownSpaceRegion may point to the middle of
+ // an allocated region.
- SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV,
- svalBuilder.getConditionType());
+ auto [state_precedesLowerBound, state_withinLowerBound] =
+ compareValueToThreshold(state, ByteOffset,
+ svalBuilder.makeZeroArrayIndex(), svalBuilder);
- std::optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>();
- if (!lowerBoundToCheck)
- return;
-
- ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
- std::tie(state_precedesLowerBound, state_withinLowerBound) =
- state->assume(*lowerBoundToCheck);
-
- // Are we constrained enough to definitely precede the lower bound?
if (state_precedesLowerBound && !state_withinLowerBound) {
+ // We know that the index definitely precedes the lower bound.
reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
return;
}
- // Otherwise, assume the constraint of the lower bound.
- assert(state_withinLowerBound);
- state = state_withinLowerBound;
+ if (state_withinLowerBound)
+ state = state_withinLowerBound;
}
- do {
- // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so,
- // we are doing a load/store after the last valid offset.
- const MemRegion *MR = rawOffset.getRegion();
- DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder);
- if (!isa<NonLoc>(Size))
- break;
-
- if (auto ConcreteSize = Size.getAs<nonloc::ConcreteInt>()) {
- std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
- getSimplifiedOffsets(rawOffset.getByteOffset(), *ConcreteSize,
- svalBuilder);
- rawOffsetVal = simplifiedOffsets.first;
- Size = simplifiedOffsets.second;
- }
-
- SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal,
- Size.castAs<NonLoc>(),
- svalBuilder.getConditionType());
-
- std::optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>();
- if (!upperboundToCheck)
- break;
-
- ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
- std::tie(state_exceedsUpperBound, state_withinUpperBound) =
- state->assume(*upperboundToCheck);
-
- // If we are under constrained and the index variables are tainted, report.
- if (state_exceedsUpperBound && state_withinUpperBound) {
- SVal ByteOffset = rawOffset.getByteOffset();
+ // CHECK UPPER BOUND
+ DefinedOrUnknownSVal Size =
+ getDynamicExtent(state, rawOffset.getRegion(), svalBuilder);
+ if (auto KnownSize = Size.getAs<NonLoc>()) {
+ auto [state_withinUpperBound, state_exceedsUpperBound] =
+ compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
+
+ if (state_exceedsUpperBound) {
+ if (!state_withinUpperBound) {
+ // We know that the index definitely exceeds the upper bound.
+ reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+ return;
+ }
if (isTainted(state, ByteOffset)) {
+ // Both cases are possible, but the index is tainted, so report.
reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
return;
}
- } else if (state_exceedsUpperBound) {
- // If we are constrained enough to definitely exceed the upper bound,
- // report.
- assert(!state_withinUpperBound);
- reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
- return;
}
- assert(state_withinUpperBound);
- state = state_withinUpperBound;
+ if (state_withinUpperBound)
+ state = state_withinUpperBound;
}
- while (false);
checkerContext.addTransition(state);
}
+
void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
ProgramStateRef errorState,
SVal TaintedSVal) const {
@@ -336,9 +321,8 @@ RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
switch (region->getKind()) {
default: {
if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
- offset = getValue(offset, svalBuilder);
- if (!offset.isUnknownOrUndef())
- return RegionRawOffsetV2(subReg, offset);
+ if (auto Offset = getValue(offset, svalBuilder).getAs<NonLoc>())
+ return RegionRawOffsetV2(subReg, *Offset);
}
return RegionRawOffsetV2();
}
diff --git a/clang/test/Analysis/out-of-bounds-false-positive.c b/clang/test/Analysis/array-bound-v2-constraint-check.c
similarity index 94%
rename from clang/test/Analysis/out-of-bounds-false-positive.c
rename to clang/test/Analysis/array-bound-v2-constraint-check.c
index 2c63f0791ecb2..dc138196b56a9 100644
--- a/clang/test/Analysis/out-of-bounds-false-positive.c
+++ b/clang/test/Analysis/array-bound-v2-constraint-check.c
@@ -8,12 +8,11 @@ typedef unsigned long long size_t;
const char a[] = "abcd"; // extent: 5 bytes
void symbolic_size_t_and_int0(size_t len) {
- // FIXME: Should not warn for this.
- (void)a[len + 1]; // expected-warning {{Out of bound memory access}}
+ (void)a[len + 1]; // no-warning
// We infered that the 'len' must be in a specific range to make the previous indexing valid.
// len: [0,3]
- clang_analyzer_eval(len <= 3); // expected - warning {{TRUE}}
- clang_analyzer_eval(len <= 2); // expected - warning {{UNKNOWN}}
+ clang_analyzer_eval(len <= 3); // expected-warning {{TRUE}}
+ clang_analyzer_eval(len <= 2); // expected-warning {{UNKNOWN}}
}
void symbolic_size_t_and_int1(size_t len) {
More information about the cfe-commits
mailing list