[clang] aceb34c - [analyzer][NFC] Simplifications in ArrayBoundV2 (#67572)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 24 01:44:17 PDT 2023
Author: DonatNagyE
Date: 2023-10-24T10:44:13+02:00
New Revision: aceb34c7046d315d615feaa94a5941db13299a1b
URL: https://github.com/llvm/llvm-project/commit/aceb34c7046d315d615feaa94a5941db13299a1b
DIFF: https://github.com/llvm/llvm-project/commit/aceb34c7046d315d615feaa94a5941db13299a1b.diff
LOG: [analyzer][NFC] Simplifications in ArrayBoundV2 (#67572)
I'm planning to improve diagnostics generation in `ArrayBoundCheckerV2`
but before that I'm refactoring the source code to clean up some
over-complicated code and an inaccurate comment.
Changes in this commit:
- Remove the `mutable std::unique_ptr<BugType>` boilerplate, because
it's no longer needed.
- Remove the code duplication between the methods `reportOOB()` and
`reportTaintedOOB()`.
- Eliminate the class `RegionRawOffsetV2` because it's just a "reinvent
the wheel" version of `std::pair` and it was used only once, as a
temporary object that was immediately decomposed. (I suspect that
`RegionRawOffset` in MemRegion.cpp could also be eliminated.)
- Flatten the code of `computeOffset()` which had contained six nested
indentation levels before this commit.
- Ensure that `computeOffset()` returns `std::nullopt` instead of a
`{Region, <zero array index>}` pair in the case when it encounters a
`Location` that is not an `ElementRegion`. This ensures that the
`checkLocation` callback returns early when it handles a memory access
where it has "nothing to do" (no subscript operation or equivalent
pointer arithmetic). Note that this is still NFC because zero is a
valid index everywhere, so the old logic without this shortcut
eventually reached the same conclusion.
- Correct a wrong explanation comment in `getSimplifiedOffsets()`.
Added:
Modified:
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Removed:
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index db4a2fcea9b2cdd..e80a06e38587520 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -32,15 +32,13 @@ using namespace taint;
namespace {
class ArrayBoundCheckerV2 :
public Checker<check::Location> {
- mutable std::unique_ptr<BugType> BT;
- mutable std::unique_ptr<BugType> TaintBT;
+ BugType BT{this, "Out-of-bound access"};
+ BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
- enum OOB_Kind { OOB_Precedes, OOB_Excedes };
+ enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
- void reportOOB(CheckerContext &C, ProgramStateRef errorState,
- OOB_Kind kind) const;
- void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
- SVal TaintedSVal) const;
+ void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
+ SVal TaintedSVal = UnknownVal()) const;
static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
@@ -48,26 +46,62 @@ class ArrayBoundCheckerV2 :
void checkLocation(SVal l, bool isLoad, const Stmt *S,
CheckerContext &C) const;
};
+} // anonymous namespace
-// FIXME: Eventually replace RegionRawOffset with this class.
-class RegionRawOffsetV2 {
-private:
- const SubRegion *baseRegion;
- NonLoc byteOffset;
-
-public:
- RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
- : baseRegion(base), byteOffset(offset) { assert(base); }
+/// For a given Location that can be represented as a symbolic expression
+/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
+/// Arr and the distance of Location from the beginning of Arr (expressed in a
+/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
+/// cannot be determined.
+std::optional<std::pair<const SubRegion *, NonLoc>>
+computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
+ QualType T = SVB.getArrayIndexType();
+ auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
+ // We will use this utility to add and multiply values.
+ return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
+ };
- NonLoc getByteOffset() const { return byteOffset; }
- const SubRegion *getRegion() const { return baseRegion; }
+ const SubRegion *OwnerRegion = nullptr;
+ std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
+
+ const ElementRegion *CurRegion =
+ dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
+
+ while (CurRegion) {
+ const auto Index = CurRegion->getIndex().getAs<NonLoc>();
+ if (!Index)
+ return std::nullopt;
+
+ QualType ElemType = CurRegion->getElementType();
+
+ // FIXME: The following early return was presumably added to safeguard the
+ // getTypeSizeInChars() call (which doesn't accept an incomplete type), but
+ // it seems that `ElemType` cannot be incomplete at this point.
+ if (ElemType->isIncompleteType())
+ return std::nullopt;
+
+ // Calculate Delta = Index * sizeof(ElemType).
+ NonLoc Size = SVB.makeArrayIndex(
+ SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
+ auto Delta = EvalBinOp(BO_Mul, *Index, Size);
+ if (!Delta)
+ return std::nullopt;
+
+ // Perform Offset += Delta.
+ Offset = EvalBinOp(BO_Add, *Offset, *Delta);
+ if (!Offset)
+ return std::nullopt;
+
+ OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
+ // When this is just another ElementRegion layer, we need to continue the
+ // offset calculations:
+ CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
+ }
- static std::optional<RegionRawOffsetV2>
- computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
+ if (OwnerRegion)
+ return std::make_pair(OwnerRegion, *Offset);
- void dump() const;
- void dumpToStream(raw_ostream &os) const;
-};
+ return std::nullopt;
}
// TODO: once the constraint manager is smart enough to handle non simplified
@@ -86,8 +120,8 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
APSIntType(extent.getValue()).convert(SIE->getRHS());
switch (SIE->getOpcode()) {
case BO_Mul:
- // The constant should never be 0 here, since it the result of scaling
- // based on the size of a type which is never 0.
+ // The constant should never be 0 here, becasue multiplication by zero
+ // is simplified by the engine.
if ((extent.getValue() % constant) != 0)
return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
else
@@ -163,16 +197,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
return;
ProgramStateRef state = checkerContext.getState();
-
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
- const std::optional<RegionRawOffsetV2> &RawOffset =
- RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
+
+ const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
+ computeOffset(state, svalBuilder, location);
if (!RawOffset)
return;
- NonLoc ByteOffset = RawOffset->getByteOffset();
- const SubRegion *Reg = RawOffset->getRegion();
+ auto [Reg, ByteOffset] = *RawOffset;
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace();
@@ -207,12 +240,13 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
if (state_exceedsUpperBound) {
if (!state_withinUpperBound) {
// We know that the index definitely exceeds the upper bound.
- reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+ reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
return;
}
if (isTainted(state, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
- reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
+ reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
+ ByteOffset);
return;
}
}
@@ -224,58 +258,40 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
checkerContext.addTransition(state);
}
-void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
- ProgramStateRef errorState,
- SVal TaintedSVal) const {
- ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
- if (!errorNode)
- return;
-
- if (!TaintBT)
- TaintBT.reset(
- new BugType(this, "Out-of-bound access", categories::TaintedData));
-
- SmallString<256> buf;
- llvm::raw_svector_ostream os(buf);
- os << "Out of bound memory access (index is tainted)";
- auto BR =
- std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);
-
- // Track back the propagation of taintedness.
- for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
- BR->markInteresting(Sym);
- }
-
- checkerContext.emitReport(std::move(BR));
-}
-
-void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
- ProgramStateRef errorState,
- OOB_Kind kind) const {
+void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
+ ProgramStateRef ErrorState, OOB_Kind Kind,
+ SVal TaintedSVal) const {
- ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
- if (!errorNode)
+ ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
+ if (!ErrorNode)
return;
- if (!BT)
- BT.reset(new BugType(this, "Out-of-bound access"));
+ // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
+ // a follow-up commit.
- // FIXME: This diagnostics are preliminary. We should get far better
- // diagnostics for explaining buffer overruns.
+ SmallString<128> Buf;
+ llvm::raw_svector_ostream Out(Buf);
+ Out << "Out of bound memory access ";
- SmallString<256> buf;
- llvm::raw_svector_ostream os(buf);
- os << "Out of bound memory access ";
- switch (kind) {
+ switch (Kind) {
case OOB_Precedes:
- os << "(accessed memory precedes memory block)";
+ Out << "(accessed memory precedes memory block)";
+ break;
+ case OOB_Exceeds:
+ Out << "(access exceeds upper limit of memory block)";
break;
- case OOB_Excedes:
- os << "(access exceeds upper limit of memory block)";
+ case OOB_Taint:
+ Out << "(index is tainted)";
break;
}
- auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
- checkerContext.emitReport(std::move(BR));
+ auto BR = std::make_unique<PathSensitiveBugReport>(
+ Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
+ // Track back the propagation of taintedness, or do nothing if TaintedSVal is
+ // the default UnknownVal().
+ for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
+ BR->markInteresting(Sym);
+ }
+ C.emitReport(std::move(BR));
}
bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
@@ -297,67 +313,6 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
(MacroName == "isupper") || (MacroName == "isxdigit"));
}
-#ifndef NDEBUG
-LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
- dumpToStream(llvm::errs());
-}
-
-void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
- os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
-}
-#endif
-
-/// For a given Location that can be represented as a symbolic expression
-/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
-/// Arr and the distance of Location from the beginning of Arr (expressed in a
-/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
-/// cannot be determined.
-std::optional<RegionRawOffsetV2>
-RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
- SVal Location) {
- QualType T = SVB.getArrayIndexType();
- auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
- // We will use this utility to add and multiply values.
- return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
- };
-
- const MemRegion *Region = Location.getAsRegion();
- NonLoc Offset = SVB.makeZeroArrayIndex();
-
- while (Region) {
- if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
- if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
- QualType ElemType = ERegion->getElementType();
- // If the element is an incomplete type, go no further.
- if (ElemType->isIncompleteType())
- return std::nullopt;
-
- // Perform Offset += Index * sizeof(ElemType); then continue the offset
- // calculations with SuperRegion:
- NonLoc Size = SVB.makeArrayIndex(
- SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
- if (auto Delta = Calc(BO_Mul, *Index, Size)) {
- if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
- Offset = *NewOffset;
- Region = ERegion->getSuperRegion();
- continue;
- }
- }
- }
- } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
- // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
- // to see a MemSpaceRegion at this point.
- // FIXME: We may return with {<Region>, 0} even if we didn't handle any
- // ElementRegion layers. I think that this behavior was introduced
- // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
- // it may be useful to review it in the future.
- return RegionRawOffsetV2(SRegion, Offset);
- }
- return std::nullopt;
- }
- return std::nullopt;
-}
-
void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundCheckerV2>();
}
More information about the cfe-commits
mailing list