[clang] [llvm] [NFC][analyzer] Extract bounds checking library (PR #202372)
DonĂ¡t Nagy via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 8 08:51:04 PDT 2026
https://github.com/NagyDonat created https://github.com/llvm/llvm-project/pull/202372
This change refactors `ArrayBoundChecker.cpp` and extracts parts of its logic into a separate library `BoundsChecking.{cpp,h}`.
In its current shape this "library" is only useful for `ArrayBoundChecker.cpp` (e.g. its message generation utilities always describe array access), but in follow-up commits I will generalize it and turn it into a backend that will be used in the various bounds checking checkers.
For earlier discussion see https://discourse.llvm.org/t/uniformization-of-bounds-checking/88975
------------------
Note for reviewers: This is a large PRs, but its individual steps are separated into 15 commits, it may be useful to follow those during the review.
My primary plan is to squash and merge this PR as a single commit on main (because I think that would be better in the commit history), but I'm open to keeping some parts separate if you would prefer that.
>From 8c23f5f8063c0f49fa3c90f06d6b69f7219f395f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 3 Jun 2026 13:31:30 +0200
Subject: [PATCH 01/15] Only take the name of the region in StateUpdateReporter
constructor
It does not need to know about the region object.
---
.../StaticAnalyzer/Checkers/ArrayBoundChecker.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 67110f021bc56..6b12fa6717fbf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -77,8 +77,7 @@ determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
}
class StateUpdateReporter {
- const MemSpaceRegion *Space;
- const SubRegion *Reg;
+ const std::string RegName;
const NonLoc ByteOffsetVal;
const std::optional<QualType> ElementType;
const std::optional<int64_t> ElementSize;
@@ -86,10 +85,10 @@ class StateUpdateReporter {
std::optional<NonLoc> AssumedUpperBound = std::nullopt;
public:
- StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E,
+ StateUpdateReporter(std::string RN, NonLoc ByteOffsVal, const Expr *E,
CheckerContext &C)
- : Space(R->getMemorySpace(C.getState())), Reg(R),
- ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(E, C)),
+ : RegName(RN), ByteOffsetVal(ByteOffsVal),
+ ElementType(determineElementType(E, C)),
ElementSize(determineElementSize(ElementType, C)) {}
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
@@ -550,7 +549,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
<< "' elements in ";
else
Out << "the extent of ";
- Out << getRegionName(Space, Reg);
+ Out << RegName;
}
return std::string(Out.str());
}
@@ -597,7 +596,8 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
// The state updates will be reported as a single note tag, which will be
// composed by this helper class.
- StateUpdateReporter SUR(Reg, ByteOffset, E, C);
+ std::string RegName = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
+ StateUpdateReporter SUR(RegName, ByteOffset, E, C);
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace(State);
>From 2f961f8d3e136dfb8627827dd4aa9030e980ef01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 3 Jun 2026 14:52:33 +0200
Subject: [PATCH 02/15] Move note creation out of StateUpdateReporter
I'm gradually transforming this class into a generic utility, taking
away some responsibilities and then adding other responsibilities.
---
.../Checkers/ArrayBoundChecker.cpp | 46 +++++++++----------
1 file changed, 22 insertions(+), 24 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 6b12fa6717fbf..750d79993c5b7 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -77,7 +77,6 @@ determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
}
class StateUpdateReporter {
- const std::string RegName;
const NonLoc ByteOffsetVal;
const std::optional<QualType> ElementType;
const std::optional<int64_t> ElementSize;
@@ -85,10 +84,8 @@ class StateUpdateReporter {
std::optional<NonLoc> AssumedUpperBound = std::nullopt;
public:
- StateUpdateReporter(std::string RN, NonLoc ByteOffsVal, const Expr *E,
- CheckerContext &C)
- : RegName(RN), ByteOffsetVal(ByteOffsVal),
- ElementType(determineElementType(E, C)),
+ StateUpdateReporter(NonLoc ByteOffsVal, const Expr *E, CheckerContext &C)
+ : ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(E, C)),
ElementSize(determineElementSize(ElementType, C)) {}
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
@@ -98,11 +95,11 @@ class StateUpdateReporter {
bool assumedNonNegative() { return AssumedNonNegative; }
- const NoteTag *createNoteTag(CheckerContext &C) const;
+ bool hasAssumption() const { return AssumedNonNegative || AssumedUpperBound; }
-private:
- std::string getMessage(PathSensitiveBugReport &BR) const;
+ std::string getMessage(PathSensitiveBugReport &BR, StringRef RegName) const;
+private:
/// Return true if information about the value of `Sym` can put constraints
/// on some symbol which is interesting within the bug report `BR`.
/// In particular, this returns true when `Sym` is interesting within `BR`;
@@ -488,17 +485,8 @@ static Messages getTaintMsgs(const MemSpaceRegion *Space,
AlsoMentionUnderflow ? "negative or " : "")};
}
-const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const {
- // Don't create a note tag if we didn't assume anything:
- if (!AssumedNonNegative && !AssumedUpperBound)
- return nullptr;
-
- return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string {
- return getMessage(BR);
- });
-}
-
-std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
+std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
+ StringRef RegName) const {
bool ShouldReportNonNegative = AssumedNonNegative;
if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
if (AssumedUpperBound &&
@@ -596,8 +584,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
// The state updates will be reported as a single note tag, which will be
// composed by this helper class.
- std::string RegName = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
- StateUpdateReporter SUR(RegName, ByteOffset, E, C);
+ StateUpdateReporter SUR(ByteOffset, E, C);
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace(State);
@@ -683,8 +670,9 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
// expression that calculates the past-the-end pointer.
if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
*KnownSize, C)) {
- C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C));
- return;
+ // The use of 'goto' is a temporary solution, will be eliminated in
+ // the next steps of the refactoring.
+ goto NormalTransition;
}
BadOffsetKind Problem = AlsoMentionUnderflow
@@ -726,8 +714,18 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
State = WithinUpperBound;
}
+NormalTransition:
// Add a transition, reporting the state updates that we accumulated.
- C.addTransition(State, SUR.createNoteTag(C));
+ const NoteTag *Tag = nullptr;
+
+ if (SUR.hasAssumption()) {
+ std::string RN = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
+ Tag = C.getNoteTag([SUR, RN](PathSensitiveBugReport &BR) -> std::string {
+ return SUR.getMessage(BR, RN);
+ });
+ }
+
+ C.addTransition(State, Tag);
}
void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
>From 48158251c347a472a484720e3fd693f563e54374 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 3 Jun 2026 14:58:52 +0200
Subject: [PATCH 03/15] Place small helpers before StateUpdateReporter
Because they will be used in the generalized version of
StateUpdateReporter.
---
.../Checkers/ArrayBoundChecker.cpp | 36 +++++++++----------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 750d79993c5b7..9843824bc2ebf 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -76,6 +76,24 @@ determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
}
+struct Messages {
+ std::string Short, Full;
+};
+
+enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
+
+constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
+ "a negative or overflowing"};
+static StringRef asAdjective(BadOffsetKind Problem) {
+ return Adjectives[static_cast<int>(Problem)];
+}
+
+constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of",
+ "around"};
+static StringRef asPreposition(BadOffsetKind Problem) {
+ return Prepositions[static_cast<int>(Problem)];
+}
+
class StateUpdateReporter {
const NonLoc ByteOffsetVal;
const std::optional<QualType> ElementType;
@@ -122,24 +140,6 @@ class StateUpdateReporter {
}
};
-struct Messages {
- std::string Short, Full;
-};
-
-enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
-
-constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
- "a negative or overflowing"};
-static StringRef asAdjective(BadOffsetKind Problem) {
- return Adjectives[static_cast<int>(Problem)];
-}
-
-constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of",
- "around"};
-static StringRef asPreposition(BadOffsetKind Problem) {
- return Prepositions[static_cast<int>(Problem)];
-}
-
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
// instead of `PreStmt` because the current implementation passes the whole
// expression to `CheckerContext::getSVal()` which only works after the
>From 6ed1ad457236440ad824beb78e4a1199d9958e02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 3 Jun 2026 15:31:48 +0200
Subject: [PATCH 04/15] Introduce the helper class SizeUnit
This is currently used for the reporting of assumptions, but in
follow-up commits I will use it to unify the reporting of assumptions
and overflow reports.
---
.../Checkers/ArrayBoundChecker.cpp | 81 +++++++++++--------
1 file changed, 46 insertions(+), 35 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 9843824bc2ebf..1c4c86fc8aa92 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -57,24 +57,41 @@ getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) {
return ASE;
}
-/// If `E` is a "clean" array subscript expression, return the type of the
-/// accessed element; otherwise return std::nullopt because that's the best (or
-/// least bad) option for the diagnostic generation that relies on this.
-static std::optional<QualType> determineElementType(const Expr *E,
- const CheckerContext &C) {
- const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
- if (!ASE)
- return std::nullopt;
+class SizeUnit {
+ QualType AsType;
+ int64_t AsCharUnits;
- return ASE->getType();
-}
+ SizeUnit() : AsType(), AsCharUnits(1) {}
-static std::optional<int64_t>
-determineElementSize(const std::optional<QualType> T, const CheckerContext &C) {
- if (!T)
- return std::nullopt;
- return C.getASTContext().getTypeSizeInChars(*T).getQuantity();
-}
+public:
+ SizeUnit(QualType T, const ASTContext &ACtx)
+ : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
+ assert(!T.isNull());
+ }
+
+ static SizeUnit bytes() { return SizeUnit(); }
+
+ bool isBytes() const { return AsType.isNull(); }
+
+ /// If `E` is a "clean" array subscript expression, return the type of the
+ /// accessed element; otherwise return 'Bytes' because that's the best (or
+ /// least bad) option for the diagnostic generation that relies on this.
+ static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
+ const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+ if (!ASE)
+ return bytes();
+
+ return SizeUnit(ASE->getType(), C.getASTContext());
+ }
+
+ int64_t asCharUnits() const { return AsCharUnits; }
+
+ std::string asExtentDesc(bool ForceBytes) const {
+ if (ForceBytes || isBytes())
+ return "the extent of";
+ return formatv("the number of '{0}' elements in", AsType.getAsString());
+ }
+};
struct Messages {
std::string Short, Full;
@@ -96,15 +113,11 @@ static StringRef asPreposition(BadOffsetKind Problem) {
class StateUpdateReporter {
const NonLoc ByteOffsetVal;
- const std::optional<QualType> ElementType;
- const std::optional<int64_t> ElementSize;
bool AssumedNonNegative = false;
std::optional<NonLoc> AssumedUpperBound = std::nullopt;
public:
- StateUpdateReporter(NonLoc ByteOffsVal, const Expr *E, CheckerContext &C)
- : ByteOffsetVal(ByteOffsVal), ElementType(determineElementType(E, C)),
- ElementSize(determineElementSize(ElementType, C)) {}
+ StateUpdateReporter(NonLoc ByteOffsVal) : ByteOffsetVal(ByteOffsVal) {}
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
@@ -115,7 +128,8 @@ class StateUpdateReporter {
bool hasAssumption() const { return AssumedNonNegative || AssumedUpperBound; }
- std::string getMessage(PathSensitiveBugReport &BR, StringRef RegName) const;
+ std::string getMessage(PathSensitiveBugReport &BR, StringRef RegName,
+ SizeUnit SU) const;
private:
/// Return true if information about the value of `Sym` can put constraints
@@ -486,7 +500,8 @@ static Messages getTaintMsgs(const MemSpaceRegion *Space,
}
std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
- StringRef RegName) const {
+ StringRef RegName,
+ SizeUnit SU) const {
bool ShouldReportNonNegative = AssumedNonNegative;
if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
if (AssumedUpperBound &&
@@ -505,7 +520,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
const bool UseIndex =
- ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize);
+ !SU.isBytes() && tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
@@ -532,12 +547,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
Out << " less than ";
if (ExtentN)
Out << *ExtentN << ", ";
- if (UseIndex && ElementType)
- Out << "the number of '" << ElementType->getAsString()
- << "' elements in ";
- else
- Out << "the extent of ";
- Out << RegName;
+ Out << SU.asExtentDesc(/*ForceBytes=*/!UseIndex) << ' ' << RegName;
}
return std::string(Out.str());
}
@@ -584,7 +594,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
// The state updates will be reported as a single note tag, which will be
// composed by this helper class.
- StateUpdateReporter SUR(ByteOffset, E, C);
+ StateUpdateReporter SUR(ByteOffset);
// CHECK LOWER BOUND
const MemSpaceRegion *Space = Reg->getMemorySpace(State);
@@ -716,16 +726,17 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
NormalTransition:
// Add a transition, reporting the state updates that we accumulated.
- const NoteTag *Tag = nullptr;
+ const NoteTag *T = nullptr;
if (SUR.hasAssumption()) {
std::string RN = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
- Tag = C.getNoteTag([SUR, RN](PathSensitiveBugReport &BR) -> std::string {
- return SUR.getMessage(BR, RN);
+ SizeUnit SU = SizeUnit::forExpr(E, C);
+ T = C.getNoteTag([SUR, RN, SU](PathSensitiveBugReport &BR) -> std::string {
+ return SUR.getMessage(BR, RN, SU);
});
}
- C.addTransition(State, Tag);
+ C.addTransition(State, T);
}
void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
>From 5031aa4ac1a6b298ba55c50e0b596b944fee08ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Thu, 4 Jun 2026 11:37:33 +0200
Subject: [PATCH 05/15] Rename 'performCheck' to 'handleAccessExpr'
---
.../lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 1c4c86fc8aa92..be667550aabc5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -167,7 +167,7 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
BugType BT{this, "Out-of-bound access"};
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
- void performCheck(const Expr *E, CheckerContext &C) const;
+ void handleAccessExpr(const Expr *E, CheckerContext &C) const;
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
NonLoc Offset, std::optional<NonLoc> Extent,
@@ -188,15 +188,15 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
public:
void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const {
- performCheck(E, C);
+ handleAccessExpr(E, C);
}
void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const {
if (E->getOpcode() == UO_Deref)
- performCheck(E, C);
+ handleAccessExpr(E, C);
}
void checkPostStmt(const MemberExpr *E, CheckerContext &C) const {
if (E->isArrow())
- performCheck(E->getBase(), C);
+ handleAccessExpr(E->getBase(), C);
}
};
@@ -570,7 +570,8 @@ bool StateUpdateReporter::providesInformationAboutInteresting(
return false;
}
-void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const {
+void ArrayBoundChecker::handleAccessExpr(const Expr *E,
+ CheckerContext &C) const {
const SVal Location = C.getSVal(E);
// The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
>From 5fb3516b1d48ca1f369dd1e118bd3c0a17d2d74c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Thu, 4 Jun 2026 20:28:26 +0200
Subject: [PATCH 06/15] Separate determining bounds and performing the bounds
check
Follow-up changes will reduce the complexity and move functions to more
natural places.
---
.../Checkers/ArrayBoundChecker.cpp | 268 +++++++++++-------
1 file changed, 158 insertions(+), 110 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index be667550aabc5..9f9fabab815c1 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -111,23 +111,51 @@ static StringRef asPreposition(BadOffsetKind Problem) {
return Prepositions[static_cast<int>(Problem)];
}
-class StateUpdateReporter {
- const NonLoc ByteOffsetVal;
+class BoundsCheckResult {
+public:
+ enum class Kind { Underflow, Overflow, TaintBug, Paradox, Valid };
+
+private:
+ Kind K = Kind::Valid;
bool AssumedNonNegative = false;
- std::optional<NonLoc> AssumedUpperBound = std::nullopt;
+ bool AssumedUpperBound = false;
+ const NonLoc Offset;
+ std::optional<NonLoc> Extent;
+ ProgramStateRef State = nullptr;
public:
- StateUpdateReporter(NonLoc ByteOffsVal) : ByteOffsetVal(ByteOffsVal) {}
+ BoundsCheckResult(NonLoc Offs, std::optional<NonLoc> E)
+ : Offset(Offs), Extent(E) {}
void recordNonNegativeAssumption() { AssumedNonNegative = true; }
- void recordUpperBoundAssumption(NonLoc UpperBoundVal) {
- AssumedUpperBound = UpperBoundVal;
+
+ void recordUpperBoundAssumption() { AssumedUpperBound = true; }
+
+ void finalize(Kind K_, ProgramStateRef S) {
+ K = K_;
+ State = S;
}
- bool assumedNonNegative() { return AssumedNonNegative; }
+ bool assumedNonNegative() const { return AssumedNonNegative; }
bool hasAssumption() const { return AssumedNonNegative || AssumedUpperBound; }
+ ProgramStateRef getState() const { return State; }
+
+ Kind getKind() const { return K; }
+
+ std::optional<BadOffsetKind> getBadOffsetKind() const {
+ switch (K) {
+ case Kind::Underflow:
+ return BadOffsetKind::Negative;
+ case Kind::Overflow:
+ return assumedNonNegative() ? BadOffsetKind::Indeterminate
+ : BadOffsetKind::Overflowing;
+ default:
+ return std::nullopt;
+ }
+ }
+
std::string getMessage(PathSensitiveBugReport &BR, StringRef RegName,
SizeUnit SU) const;
@@ -154,6 +182,12 @@ class StateUpdateReporter {
}
};
+struct CheckFlags {
+ bool CheckUnderflow;
+ bool OffsetObviouslyNonnegative;
+ bool AcceptPastTheEnd;
+};
+
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
// instead of `PreStmt` because the current implementation passes the whole
// expression to `CheckerContext::getSVal()` which only works after the
@@ -168,6 +202,9 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
void handleAccessExpr(const Expr *E, CheckerContext &C) const;
+ BoundsCheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+ NonLoc Offset, std::optional<NonLoc> Extent,
+ CheckFlags Flags) const;
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
NonLoc Offset, std::optional<NonLoc> Extent,
@@ -181,9 +218,6 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
static bool isOffsetObviouslyNonnegative(const Expr *E, CheckerContext &C);
- static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State,
- NonLoc Offset, NonLoc Limit,
- CheckerContext &C);
static bool isInAddressOf(const Stmt *S, ASTContext &AC);
public:
@@ -444,6 +478,9 @@ static Messages getNonTaintMsgs(const ASTContext &ACtx,
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
+ if (Problem == BadOffsetKind::Negative)
+ ExtentN = std::nullopt;
+
int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
@@ -499,13 +536,12 @@ static Messages getTaintMsgs(const MemSpaceRegion *Space,
AlsoMentionUnderflow ? "negative or " : "")};
}
-std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
- StringRef RegName,
- SizeUnit SU) const {
+std::string BoundsCheckResult::getMessage(PathSensitiveBugReport &BR,
+ StringRef RegName,
+ SizeUnit SU) const {
bool ShouldReportNonNegative = AssumedNonNegative;
- if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) {
- if (AssumedUpperBound &&
- providesInformationAboutInteresting(*AssumedUpperBound, BR)) {
+ if (!providesInformationAboutInteresting(Offset, BR)) {
+ if (AssumedUpperBound && providesInformationAboutInteresting(*Extent, BR)) {
// Even if the byte offset isn't interesting (e.g. it's a constant value),
// the assumption can still be interesting if it provides information
// about an interesting symbolic upper bound.
@@ -516,8 +552,8 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
}
}
- std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal);
- std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound);
+ std::optional<int64_t> OffsetN = getConcreteValue(Offset);
+ std::optional<int64_t> ExtentN = getConcreteValue(Extent);
const bool UseIndex =
!SU.isBytes() && tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
@@ -529,7 +565,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
Out << "index ";
if (OffsetN)
Out << "'" << OffsetN << "' ";
- } else if (AssumedUpperBound) {
+ } else if (Extent) {
Out << "byte offset ";
if (OffsetN)
Out << "'" << OffsetN << "' ";
@@ -541,7 +577,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
if (ShouldReportNonNegative) {
Out << " non-negative";
}
- if (AssumedUpperBound) {
+ if (Extent) {
if (ShouldReportNonNegative)
Out << " and";
Out << " less than ";
@@ -552,7 +588,7 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR,
return std::string(Out.str());
}
-bool StateUpdateReporter::providesInformationAboutInteresting(
+bool BoundsCheckResult::providesInformationAboutInteresting(
SymbolRef Sym, PathSensitiveBugReport &BR) {
if (!Sym)
return false;
@@ -593,27 +629,89 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
auto [Reg, ByteOffset] = *RawOffset;
- // The state updates will be reported as a single note tag, which will be
- // composed by this helper class.
- StateUpdateReporter SUR(ByteOffset);
+ const MemSpaceRegion *Space = Reg->getMemorySpace(State);
+ auto Extent = getDynamicExtent(State, Reg, SVB).getAs<NonLoc>();
+
+ // A symbolic region in unknown space represents an unknown pointer that
+ // may point into the middle of an array, so we don't look for underflows.
+ // Both conditions are significant because we want to check underflows in
+ // symbolic regions on the heap (which may be introduced by checkers like
+ // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
+ // non-symbolic regions (e.g. a field subregion of a symbolic region) in
+ // unknown space.
+
+ CheckFlags Flags = {
+ /*CheckUnderflow=*/!(isa<SymbolicRegion>(Reg) &&
+ isa<UnknownSpaceRegion>(Space)),
+ /*OffsetObviouslyNonnegative=*/isOffsetObviouslyNonnegative(E, C),
+ /*AcceptPastTheEnd=*/isa<ArraySubscriptExpr>(E) &&
+ isInAddressOf(E, C.getASTContext()),
+ };
+
+ BoundsCheckResult Res = checkBounds(State, SVB, ByteOffset, Extent, Flags);
+
+ switch (Res.getKind()) {
+ case BoundsCheckResult::Kind::Paradox:
+ // The current state is paradoxical (due to bad modeling of casts we
+ // assumed that an unsigned value is negative), so we should sink the
+ // execution path.
+ C.addSink();
+ return;
+
+ case BoundsCheckResult::Kind::Valid: {
+ const NoteTag *Tag = nullptr;
+ if (Res.hasAssumption()) {
+ std::string RN = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
+ SizeUnit SU = SizeUnit::forExpr(E, C);
+ Tag = C.getNoteTag(
+ [Res, RN, SU](PathSensitiveBugReport &BR) -> std::string {
+ return Res.getMessage(BR, RN, SU);
+ });
+ }
+
+ C.addTransition(Res.getState(), Tag);
+ return;
+ }
+ case BoundsCheckResult::Kind::TaintBug: {
+ // Diagnostic detail: saying "tainted offset" is always correct, but
+ // the common case is that 'idx' is tainted in 'arr[idx]' and then it's
+ // nicer to say "tainted index".
+ const char *OffsetName = "offset";
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
+ if (isTainted(State, ASE->getIdx(), C.getStackFrame()))
+ OffsetName = "index";
+
+ Messages Msgs =
+ getTaintMsgs(Space, Reg, OffsetName,
+ /*AlsoMentionUnderflow=*/Res.assumedNonNegative());
+ reportOOB(C, Res.getState(), Msgs, ByteOffset, Extent,
+ /*IsTaintBug=*/true);
+ return;
+ }
+ default: {
+ Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
+ Extent, Location, *Res.getBadOffsetKind());
+ reportOOB(C, Res.getState(), Msgs, ByteOffset, Extent);
+ return;
+ }
+ }
+}
+
+BoundsCheckResult ArrayBoundChecker::checkBounds(ProgramStateRef State,
+ SValBuilder &SVB,
+ NonLoc Offset,
+ std::optional<NonLoc> Extent,
+ CheckFlags Flags) const {
+ BoundsCheckResult Res(Offset, Extent);
// CHECK LOWER BOUND
- const MemSpaceRegion *Space = Reg->getMemorySpace(State);
- if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
- // A symbolic region in unknown space represents an unknown pointer that
- // may point into the middle of an array, so we don't look for underflows.
- // Both conditions are significant because we want to check underflows in
- // symbolic regions on the heap (which may be introduced by checkers like
- // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
- // non-symbolic regions (e.g. a field subregion of a symbolic region) in
- // unknown space.
- auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
- State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);
+ if (Flags.CheckUnderflow) {
+ auto [PrecedesLowerBound, WithinLowerBound] =
+ compareValueToThreshold(State, Offset, SVB.makeZeroArrayIndex(), SVB);
if (PrecedesLowerBound) {
// The analyzer thinks that the offset may be invalid (negative)...
-
- if (isOffsetObviouslyNonnegative(E, C)) {
+ if (Flags.OffsetObviouslyNonnegative) {
// ...but the offset is obviously non-negative (clear array subscript
// with an unsigned index), so we're in a buggy situation.
@@ -632,8 +730,8 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
if (!WithinLowerBound) {
// The state is completely nonsense -- let's just sink it!
- C.addSink();
- return;
+ Res.finalize(BoundsCheckResult::Kind::Paradox, PrecedesLowerBound);
+ return Res;
}
// Otherwise continue on the 'WithinLowerBound' branch where the
// unsigned index _is_ non-negative. Don't mention this assumption as a
@@ -641,15 +739,12 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
} else {
if (!WithinLowerBound) {
// ...and it cannot be valid (>= 0), so report an error.
- Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg,
- ByteOffset, /*Extent=*/std::nullopt,
- Location, BadOffsetKind::Negative);
- reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt);
- return;
+ Res.finalize(BoundsCheckResult::Kind::Underflow, PrecedesLowerBound);
+ return Res;
}
// ...but it can be valid as well, so the checker will (optimistically)
// assume that it's valid and mention this in the note tag.
- SUR.recordNonNegativeAssumption();
+ Res.recordNonNegativeAssumption();
}
}
@@ -661,17 +756,9 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
}
// CHECK UPPER BOUND
- DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
- if (auto KnownSize = Size.getAs<NonLoc>()) {
- // In a situation where both underflow and overflow are possible (but the
- // index is either tainted or known to be invalid), the logic of this
- // checker will first assume that the offset is non-negative, and then
- // (with this additional assumption) it will detect an overflow error.
- // In this situation the warning message should mention both possibilities.
- bool AlsoMentionUnderflow = SUR.assumedNonNegative();
-
+ if (Extent) {
auto [WithinUpperBound, ExceedsUpperBound] =
- compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);
+ compareValueToThreshold(State, Offset, *Extent, SVB);
if (ExceedsUpperBound) {
// The offset may be invalid (>= Size)...
@@ -679,43 +766,28 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
// ...and it cannot be within bounds, so report an error, unless we can
// definitely determine that this is an idiomatic `&array[size]`
// expression that calculates the past-the-end pointer.
- if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset,
- *KnownSize, C)) {
- // The use of 'goto' is a temporary solution, will be eliminated in
- // the next steps of the refactoring.
- goto NormalTransition;
+ if (Flags.AcceptPastTheEnd) {
+ auto [EqualsToThreshold, NotEqualToThreshold] =
+ compareValueToThreshold(State, Offset, *Extent, SVB,
+ /*CheckEquality=*/true);
+ if (EqualsToThreshold && !NotEqualToThreshold) {
+ Res.finalize(BoundsCheckResult::Kind::Valid, State);
+ return Res;
+ }
}
- BadOffsetKind Problem = AlsoMentionUnderflow
- ? BadOffsetKind::Indeterminate
- : BadOffsetKind::Overflowing;
- Messages Msgs =
- getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
- *KnownSize, Location, Problem);
- reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize);
- return;
+ Res.finalize(BoundsCheckResult::Kind::Overflow, ExceedsUpperBound);
+ return Res;
}
// ...and it can be valid as well...
- if (isTainted(State, ByteOffset)) {
+ if (isTainted(State, Offset)) {
// ...but it's tainted, so report an error.
-
- // Diagnostic detail: saying "tainted offset" is always correct, but
- // the common case is that 'idx' is tainted in 'arr[idx]' and then it's
- // nicer to say "tainted index".
- const char *OffsetName = "offset";
- if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
- if (isTainted(State, ASE->getIdx(), C.getStackFrame()))
- OffsetName = "index";
-
- Messages Msgs =
- getTaintMsgs(Space, Reg, OffsetName, AlsoMentionUnderflow);
- reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize,
- /*IsTaintBug=*/true);
- return;
+ Res.finalize(BoundsCheckResult::Kind::TaintBug, State);
+ return Res;
}
// ...and it isn't tainted, so the checker will (optimistically) assume
// that the offset is in bounds and mention this in the note tag.
- SUR.recordUpperBoundAssumption(*KnownSize);
+ Res.recordUpperBoundAssumption();
}
// Actually update the state. The "if" only fails in the extremely unlikely
@@ -724,20 +796,8 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
if (WithinUpperBound)
State = WithinUpperBound;
}
-
-NormalTransition:
- // Add a transition, reporting the state updates that we accumulated.
- const NoteTag *T = nullptr;
-
- if (SUR.hasAssumption()) {
- std::string RN = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
- SizeUnit SU = SizeUnit::forExpr(E, C);
- T = C.getNoteTag([SUR, RN, SU](PathSensitiveBugReport &BR) -> std::string {
- return SUR.getMessage(BR, RN, SU);
- });
- }
-
- C.addTransition(State, T);
+ Res.finalize(BoundsCheckResult::Kind::Valid, State);
+ return Res;
}
void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
@@ -834,18 +894,6 @@ bool ArrayBoundChecker::isInAddressOf(const Stmt *S, ASTContext &ACtx) {
return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf;
}
-bool ArrayBoundChecker::isIdiomaticPastTheEndPtr(const Expr *E,
- ProgramStateRef State,
- NonLoc Offset, NonLoc Limit,
- CheckerContext &C) {
- if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) {
- auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold(
- State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true);
- return EqualsToThreshold && !NotEqualToThreshold;
- }
- return false;
-}
-
void ento::registerArrayBoundChecker(CheckerManager &mgr) {
mgr.registerChecker<ArrayBoundChecker>();
}
>From bcbaf55ea162b05c152a5a29dd349f118ed33e92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Fri, 5 Jun 2026 12:11:19 +0200
Subject: [PATCH 07/15] Move 'checkBounds' out of 'ArrayBoundChecker'
..to prepare for moving it to a separate source file.
Additionally, this way I can make `checkBounds` a friend of
`BoundsCheckResult` and hide the methods of `BoundsCheckResult` that
allow creation of "incomplete" (not yet `finalize`d) `BoundsCheckResult`
objects (e.g. the constructor).
---
.../Checkers/ArrayBoundChecker.cpp | 34 ++++++++++++-------
1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 9f9fabab815c1..9007b1cc12d32 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -111,6 +111,12 @@ static StringRef asPreposition(BadOffsetKind Problem) {
return Prepositions[static_cast<int>(Problem)];
}
+struct CheckFlags {
+ bool CheckUnderflow;
+ bool OffsetObviouslyNonnegative;
+ bool AcceptPastTheEnd;
+};
+
class BoundsCheckResult {
public:
enum class Kind { Underflow, Overflow, TaintBug, Paradox, Valid };
@@ -123,7 +129,6 @@ class BoundsCheckResult {
std::optional<NonLoc> Extent;
ProgramStateRef State = nullptr;
-public:
BoundsCheckResult(NonLoc Offs, std::optional<NonLoc> E)
: Offset(Offs), Extent(E) {}
@@ -136,6 +141,13 @@ class BoundsCheckResult {
State = S;
}
+public:
+ friend BoundsCheckResult checkBounds(ProgramStateRef State,
+ SValBuilder &SVB,
+ NonLoc Offset,
+ std::optional<NonLoc> Extent,
+ CheckFlags Flags);
+
bool assumedNonNegative() const { return AssumedNonNegative; }
bool hasAssumption() const { return AssumedNonNegative || AssumedUpperBound; }
@@ -182,11 +194,11 @@ class BoundsCheckResult {
}
};
-struct CheckFlags {
- bool CheckUnderflow;
- bool OffsetObviouslyNonnegative;
- bool AcceptPastTheEnd;
-};
+BoundsCheckResult checkBounds(ProgramStateRef State,
+ SValBuilder &SVB,
+ NonLoc Offset,
+ std::optional<NonLoc> Extent,
+ CheckFlags Flags);
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
// instead of `PreStmt` because the current implementation passes the whole
@@ -202,9 +214,6 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
void handleAccessExpr(const Expr *E, CheckerContext &C) const;
- BoundsCheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
- NonLoc Offset, std::optional<NonLoc> Extent,
- CheckFlags Flags) const;
void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs,
NonLoc Offset, std::optional<NonLoc> Extent,
@@ -235,7 +244,6 @@ class ArrayBoundChecker : public Checker<check::PostStmt<ArraySubscriptExpr>,
};
} // anonymous namespace
-
/// For a given Location that can be represented as a symbolic expression
/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
/// Arr and the distance of Location from the beginning of Arr (expressed in a
@@ -697,11 +705,12 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
}
}
-BoundsCheckResult ArrayBoundChecker::checkBounds(ProgramStateRef State,
+namespace {
+BoundsCheckResult checkBounds(ProgramStateRef State,
SValBuilder &SVB,
NonLoc Offset,
std::optional<NonLoc> Extent,
- CheckFlags Flags) const {
+ CheckFlags Flags) {
BoundsCheckResult Res(Offset, Extent);
// CHECK LOWER BOUND
@@ -799,6 +808,7 @@ BoundsCheckResult ArrayBoundChecker::checkBounds(ProgramStateRef State,
Res.finalize(BoundsCheckResult::Kind::Valid, State);
return Res;
}
+} // anonymous namespace
void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
ProgramStateRef ErrorState,
>From 7a077d1f81b0f228e05f65bfad480b8d2fa85040 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Fri, 5 Jun 2026 12:45:34 +0200
Subject: [PATCH 08/15] Unify 'getRegionName' calls
---
.../StaticAnalyzer/Checkers/ArrayBoundChecker.cpp | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 9007b1cc12d32..33875ad5ddeb1 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -474,11 +474,9 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
}
static Messages getNonTaintMsgs(const ASTContext &ACtx,
- const MemSpaceRegion *Space,
- const SubRegion *Region, NonLoc Offset,
+ std::string RegName, NonLoc Offset,
std::optional<NonLoc> Extent, SVal Location,
BadOffsetKind Problem) {
- std::string RegName = getRegionName(Space, Region);
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
QualType ElemType = EReg->getElementType();
@@ -533,10 +531,8 @@ static Messages getNonTaintMsgs(const ASTContext &ACtx,
std::string(Buf)};
}
-static Messages getTaintMsgs(const MemSpaceRegion *Space,
- const SubRegion *Region, const char *OffsetName,
+static Messages getTaintMsgs(std::string RegName, const char *OffsetName,
bool AlsoMentionUnderflow) {
- std::string RegName = getRegionName(Space, Region);
return {formatv("Potential out of bound access to {0} with tainted {1}",
RegName, OffsetName),
formatv("Access of {0} with a tainted {1} that may be {2}too large",
@@ -658,6 +654,8 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
BoundsCheckResult Res = checkBounds(State, SVB, ByteOffset, Extent, Flags);
+ std::string RN = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
+
switch (Res.getKind()) {
case BoundsCheckResult::Kind::Paradox:
// The current state is paradoxical (due to bad modeling of casts we
@@ -669,7 +667,6 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
case BoundsCheckResult::Kind::Valid: {
const NoteTag *Tag = nullptr;
if (Res.hasAssumption()) {
- std::string RN = getRegionName(Reg->getMemorySpace(C.getState()), Reg);
SizeUnit SU = SizeUnit::forExpr(E, C);
Tag = C.getNoteTag(
[Res, RN, SU](PathSensitiveBugReport &BR) -> std::string {
@@ -690,14 +687,14 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
OffsetName = "index";
Messages Msgs =
- getTaintMsgs(Space, Reg, OffsetName,
+ getTaintMsgs(RN, OffsetName,
/*AlsoMentionUnderflow=*/Res.assumedNonNegative());
reportOOB(C, Res.getState(), Msgs, ByteOffset, Extent,
/*IsTaintBug=*/true);
return;
}
default: {
- Messages Msgs = getNonTaintMsgs(C.getASTContext(), Space, Reg, ByteOffset,
+ Messages Msgs = getNonTaintMsgs(C.getASTContext(), RN, ByteOffset,
Extent, Location, *Res.getBadOffsetKind());
reportOOB(C, Res.getState(), Msgs, ByteOffset, Extent);
return;
>From 416f31ad0f50800b57024c29201cdd8610382af2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Fri, 5 Jun 2026 13:08:07 +0200
Subject: [PATCH 09/15] Use 'SizeUnit' in 'getNonTaintMsgs'
This methods duplicates lots of logic that also appears in the method
'BoundsCheckResult::getMessage' (which composes the assumption
messages), so move towards unifying them.
---
.../Checkers/ArrayBoundChecker.cpp | 43 +++++++++++--------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 33875ad5ddeb1..622ffafe18e64 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -75,7 +75,7 @@ class SizeUnit {
/// If `E` is a "clean" array subscript expression, return the type of the
/// accessed element; otherwise return 'Bytes' because that's the best (or
- /// least bad) option for the diagnostic generation that relies on this.
+ /// least bad) option for the assumption messages that use this.
static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
if (!ASE)
@@ -84,6 +84,16 @@ class SizeUnit {
return SizeUnit(ASE->getType(), C.getASTContext());
}
+ /// Return the element type that is "natural" for reporting out-of-bounds
+ /// memory access to 'Location'.
+ /// FIXME: It is unfortunate that this heuristic differs from the heuristic
+ /// used for reporting assumption (`SizeUnit::forExpr`).
+ static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
+ const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+ assert(EReg && "this checker only handles element access");
+ return SizeUnit(EReg->getElementType(), ACtx);
+ }
+
int64_t asCharUnits() const { return AsCharUnits; }
std::string asExtentDesc(bool ForceBytes) const {
@@ -91,6 +101,12 @@ class SizeUnit {
return "the extent of";
return formatv("the number of '{0}' elements in", AsType.getAsString());
}
+
+ std::string asElementName(bool ForceBytes) const {
+ if (ForceBytes || isBytes())
+ return "byte";
+ return formatv("'{0}' element", AsType.getAsString());
+ }
};
struct Messages {
@@ -473,13 +489,9 @@ static bool tryDividePair(std::optional<int64_t> &Val1,
return true;
}
-static Messages getNonTaintMsgs(const ASTContext &ACtx,
- std::string RegName, NonLoc Offset,
- std::optional<NonLoc> Extent, SVal Location,
+static Messages getNonTaintMsgs(std::string RegName, SizeUnit SU, NonLoc Offset,
+ std::optional<NonLoc> Extent,
BadOffsetKind Problem) {
- const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
- assert(EReg && "this checker only handles element access");
- QualType ElemType = EReg->getElementType();
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
@@ -487,9 +499,7 @@ static Messages getNonTaintMsgs(const ASTContext &ACtx,
if (Problem == BadOffsetKind::Negative)
ExtentN = std::nullopt;
- int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity();
-
- bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize);
+ bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
SmallString<256> Buf;
@@ -501,7 +511,7 @@ static Messages getNonTaintMsgs(const ASTContext &ACtx,
// natural to mention the element type later where the extent is described,
// but if the extent is unknown/irrelevant, then the element type can be
// inserted into the message at this point.
- Out << "'" << ElemType.getAsString() << "' element in ";
+ Out << SU.asElementName(/*ForceBytes=*/false) << " in ";
}
Out << RegName << " at ";
if (OffsetN) {
@@ -517,10 +527,8 @@ static Messages getNonTaintMsgs(const ASTContext &ACtx,
Out << *ExtentN;
else
Out << "a single";
- if (UseByteOffsets)
- Out << " byte";
- else
- Out << " '" << ElemType.getAsString() << "' element";
+
+ Out << ' ' << SU.asElementName(/*ForceBytes=*/UseByteOffsets);
if (*ExtentN > 1)
Out << "s";
@@ -694,8 +702,9 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
return;
}
default: {
- Messages Msgs = getNonTaintMsgs(C.getASTContext(), RN, ByteOffset,
- Extent, Location, *Res.getBadOffsetKind());
+ SizeUnit SU = SizeUnit::forSVal(Location, C.getASTContext());
+ Messages Msgs =
+ getNonTaintMsgs(RN, SU, ByteOffset, Extent, *Res.getBadOffsetKind());
reportOOB(C, Res.getState(), Msgs, ByteOffset, Extent);
return;
}
>From 92b237d458c3194fbbb87e201caaea6f335e7189 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Fri, 5 Jun 2026 14:42:32 +0200
Subject: [PATCH 10/15] Extract BoundsChecking library to separate files
The code is moved to the new location with only very few necessary
changes (removing 'static' from some functions).
It will be improved with further commits.
---
.../StaticAnalyzer/Checkers/BoundsChecking.h | 205 ++++++
.../Checkers/ArrayBoundChecker.cpp | 599 +-----------------
.../Checkers/BoundsChecking.cpp | 447 +++++++++++++
.../StaticAnalyzer/Checkers/CMakeLists.txt | 1 +
.../lib/StaticAnalyzer/Checkers/BUILD.gn | 1 +
5 files changed, 655 insertions(+), 598 deletions(-)
create mode 100644 clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
create mode 100644 clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
new file mode 100644
index 0000000000000..8aebe63556182
--- /dev/null
+++ b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
@@ -0,0 +1,205 @@
+//===- BoundsChecking.h - Bounds checking related APIs ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines APIs for performing a bounds check (i.e. comparing a
+// symbolic Offset value to zero and a symbolic Extent value) and composing
+// descriptions that explain its results.
+//
+// This is intended as a replacement for `ProgramState::assumeInBound` to
+// avoid its incorrect logic and compensate for deficiencies of other parts of
+// the analyzer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <optional>
+
+using llvm::formatv;
+
+namespace clang {
+namespace ento {
+
+/// If `E` is an array subscript expression with a base that is "clean" (= not
+/// modified by pointer arithmetic = the beginning of a memory region), return
+/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
+/// This helper function is used by two separate heuristics that are only valid
+/// in these "clean" cases.
+const ArraySubscriptExpr *getAsCleanArraySubscriptExpr(const Expr *E,
+ const CheckerContext &C);
+
+class SizeUnit {
+ QualType AsType;
+ int64_t AsCharUnits;
+
+ SizeUnit() : AsType(), AsCharUnits(1) {}
+
+public:
+ SizeUnit(QualType T, const ASTContext &ACtx)
+ : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
+ assert(!T.isNull());
+ }
+
+ static SizeUnit bytes() { return SizeUnit(); }
+
+ bool isBytes() const { return AsType.isNull(); }
+
+ /// If `E` is a "clean" array subscript expression, return the type of the
+ /// accessed element; otherwise return 'Bytes' because that's the best (or
+ /// least bad) option for the assumption messages that use this.
+ static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
+ const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+ if (!ASE)
+ return bytes();
+
+ return SizeUnit(ASE->getType(), C.getASTContext());
+ }
+
+ /// Return the element type that is "natural" for reporting out-of-bounds
+ /// memory access to 'Location'.
+ /// FIXME: It is unfortunate that this heuristic differs from the heuristic
+ /// used for reporting assumption (`SizeUnit::forExpr`).
+ static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
+ const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+ assert(EReg && "this checker only handles element access");
+ return SizeUnit(EReg->getElementType(), ACtx);
+ }
+
+ int64_t asCharUnits() const { return AsCharUnits; }
+
+ std::string asExtentDesc(bool ForceBytes) const {
+ if (ForceBytes || isBytes())
+ return "the extent of";
+ return formatv("the number of '{0}' elements in", AsType.getAsString());
+ }
+
+ std::string asElementName(bool ForceBytes) const {
+ if (ForceBytes || isBytes())
+ return "byte";
+ return formatv("'{0}' element", AsType.getAsString());
+ }
+};
+
+struct Messages {
+ std::string Short, Full;
+};
+
+enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
+
+constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
+ "a negative or overflowing"};
+inline StringRef asAdjective(BadOffsetKind Problem) {
+ return Adjectives[static_cast<int>(Problem)];
+}
+
+constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of",
+ "around"};
+inline StringRef asPreposition(BadOffsetKind Problem) {
+ return Prepositions[static_cast<int>(Problem)];
+}
+
+struct CheckFlags {
+ bool CheckUnderflow;
+ bool OffsetObviouslyNonnegative;
+ bool AcceptPastTheEnd;
+};
+
+class BoundsCheckResult {
+public:
+ enum class Kind { Underflow, Overflow, TaintBug, Paradox, Valid };
+
+private:
+ Kind K = Kind::Valid;
+ bool AssumedNonNegative = false;
+ bool AssumedUpperBound = false;
+ const NonLoc Offset;
+ std::optional<NonLoc> Extent;
+ ProgramStateRef State = nullptr;
+
+ BoundsCheckResult(NonLoc Offs, std::optional<NonLoc> E)
+ : Offset(Offs), Extent(E) {}
+
+ void recordNonNegativeAssumption() { AssumedNonNegative = true; }
+
+ void recordUpperBoundAssumption() { AssumedUpperBound = true; }
+
+ void finalize(Kind K_, ProgramStateRef S) {
+ K = K_;
+ State = S;
+ }
+
+public:
+ friend BoundsCheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+ NonLoc Offset,
+ std::optional<NonLoc> Extent,
+ CheckFlags Flags);
+
+ bool assumedNonNegative() const { return AssumedNonNegative; }
+
+ bool hasAssumption() const { return AssumedNonNegative || AssumedUpperBound; }
+
+ ProgramStateRef getState() const { return State; }
+
+ Kind getKind() const { return K; }
+
+ std::optional<BadOffsetKind> getBadOffsetKind() const {
+ switch (K) {
+ case Kind::Underflow:
+ return BadOffsetKind::Negative;
+ case Kind::Overflow:
+ return assumedNonNegative() ? BadOffsetKind::Indeterminate
+ : BadOffsetKind::Overflowing;
+ default:
+ return std::nullopt;
+ }
+ }
+
+ std::string getMessage(PathSensitiveBugReport &BR, StringRef RegName,
+ SizeUnit SU) const;
+
+private:
+ /// Return true if information about the value of `Sym` can put constraints
+ /// on some symbol which is interesting within the bug report `BR`.
+ /// In particular, this returns true when `Sym` is interesting within `BR`;
+ /// but it also returns true if `Sym` is an expression that contains integer
+ /// constants and a single symbolic operand which is interesting (in `BR`).
+ /// We need to use this instead of plain `BR.isInteresting()` because if we
+ /// are analyzing code like
+ /// int array[10];
+ /// int f(int arg) {
+ /// return array[arg] && array[arg + 10];
+ /// }
+ /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
+ /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
+ /// to detect this out of bounds access).
+ static bool providesInformationAboutInteresting(SymbolRef Sym,
+ PathSensitiveBugReport &BR);
+ static bool providesInformationAboutInteresting(SVal SV,
+ PathSensitiveBugReport &BR) {
+ return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
+ }
+};
+
+BoundsCheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+ NonLoc Offset, std::optional<NonLoc> Extent,
+ CheckFlags Flags);
+
+std::string getRegionName(const MemSpaceRegion *Space, const SubRegion *Region);
+
+Messages getTaintMsgs(std::string RegName, const char *OffsetName,
+ bool AlsoMentionUnderflow);
+
+Messages getNonTaintMsgs(std::string RegName, SizeUnit SU, NonLoc Offset,
+ std::optional<NonLoc> Extent, BadOffsetKind Problem);
+
+} // namespace ento
+} // namespace clang
+
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 622ffafe18e64..a3b68854bef5c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -11,211 +11,21 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/AST/CharUnits.h"
#include "clang/AST/ParentMapContext.h"
+#include "clang/StaticAnalyzer/Checkers/BoundsChecking.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Checkers/Taint.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
-#include "llvm/ADT/APSInt.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/raw_ostream.h"
#include <optional>
using namespace clang;
using namespace ento;
using namespace taint;
-using llvm::formatv;
namespace {
-/// If `E` is an array subscript expression with a base that is "clean" (= not
-/// modified by pointer arithmetic = the beginning of a memory region), return
-/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
-/// This helper function is used by two separate heuristics that are only valid
-/// in these "clean" cases.
-static const ArraySubscriptExpr *
-getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) {
- const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
- if (!ASE)
- return nullptr;
-
- const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
- if (!SubscriptBaseReg)
- return nullptr;
-
- // The base of the subscript expression is affected by pointer arithmetics,
- // so we want to report byte offsets instead of indices and we don't want to
- // activate the "index is unsigned -> cannot be negative" shortcut.
- if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
- return nullptr;
-
- return ASE;
-}
-
-class SizeUnit {
- QualType AsType;
- int64_t AsCharUnits;
-
- SizeUnit() : AsType(), AsCharUnits(1) {}
-
-public:
- SizeUnit(QualType T, const ASTContext &ACtx)
- : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
- assert(!T.isNull());
- }
-
- static SizeUnit bytes() { return SizeUnit(); }
-
- bool isBytes() const { return AsType.isNull(); }
-
- /// If `E` is a "clean" array subscript expression, return the type of the
- /// accessed element; otherwise return 'Bytes' because that's the best (or
- /// least bad) option for the assumption messages that use this.
- static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
- const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
- if (!ASE)
- return bytes();
-
- return SizeUnit(ASE->getType(), C.getASTContext());
- }
-
- /// Return the element type that is "natural" for reporting out-of-bounds
- /// memory access to 'Location'.
- /// FIXME: It is unfortunate that this heuristic differs from the heuristic
- /// used for reporting assumption (`SizeUnit::forExpr`).
- static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
- const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
- assert(EReg && "this checker only handles element access");
- return SizeUnit(EReg->getElementType(), ACtx);
- }
-
- int64_t asCharUnits() const { return AsCharUnits; }
-
- std::string asExtentDesc(bool ForceBytes) const {
- if (ForceBytes || isBytes())
- return "the extent of";
- return formatv("the number of '{0}' elements in", AsType.getAsString());
- }
-
- std::string asElementName(bool ForceBytes) const {
- if (ForceBytes || isBytes())
- return "byte";
- return formatv("'{0}' element", AsType.getAsString());
- }
-};
-
-struct Messages {
- std::string Short, Full;
-};
-
-enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
-
-constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
- "a negative or overflowing"};
-static StringRef asAdjective(BadOffsetKind Problem) {
- return Adjectives[static_cast<int>(Problem)];
-}
-
-constexpr llvm::StringLiteral Prepositions[] = {"preceding", "after the end of",
- "around"};
-static StringRef asPreposition(BadOffsetKind Problem) {
- return Prepositions[static_cast<int>(Problem)];
-}
-
-struct CheckFlags {
- bool CheckUnderflow;
- bool OffsetObviouslyNonnegative;
- bool AcceptPastTheEnd;
-};
-
-class BoundsCheckResult {
-public:
- enum class Kind { Underflow, Overflow, TaintBug, Paradox, Valid };
-
-private:
- Kind K = Kind::Valid;
- bool AssumedNonNegative = false;
- bool AssumedUpperBound = false;
- const NonLoc Offset;
- std::optional<NonLoc> Extent;
- ProgramStateRef State = nullptr;
-
- BoundsCheckResult(NonLoc Offs, std::optional<NonLoc> E)
- : Offset(Offs), Extent(E) {}
-
- void recordNonNegativeAssumption() { AssumedNonNegative = true; }
-
- void recordUpperBoundAssumption() { AssumedUpperBound = true; }
-
- void finalize(Kind K_, ProgramStateRef S) {
- K = K_;
- State = S;
- }
-
-public:
- friend BoundsCheckResult checkBounds(ProgramStateRef State,
- SValBuilder &SVB,
- NonLoc Offset,
- std::optional<NonLoc> Extent,
- CheckFlags Flags);
-
- bool assumedNonNegative() const { return AssumedNonNegative; }
-
- bool hasAssumption() const { return AssumedNonNegative || AssumedUpperBound; }
-
- ProgramStateRef getState() const { return State; }
-
- Kind getKind() const { return K; }
-
- std::optional<BadOffsetKind> getBadOffsetKind() const {
- switch (K) {
- case Kind::Underflow:
- return BadOffsetKind::Negative;
- case Kind::Overflow:
- return assumedNonNegative() ? BadOffsetKind::Indeterminate
- : BadOffsetKind::Overflowing;
- default:
- return std::nullopt;
- }
- }
-
- std::string getMessage(PathSensitiveBugReport &BR, StringRef RegName,
- SizeUnit SU) const;
-
-private:
- /// Return true if information about the value of `Sym` can put constraints
- /// on some symbol which is interesting within the bug report `BR`.
- /// In particular, this returns true when `Sym` is interesting within `BR`;
- /// but it also returns true if `Sym` is an expression that contains integer
- /// constants and a single symbolic operand which is interesting (in `BR`).
- /// We need to use this instead of plain `BR.isInteresting()` because if we
- /// are analyzing code like
- /// int array[10];
- /// int f(int arg) {
- /// return array[arg] && array[arg + 10];
- /// }
- /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
- /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
- /// to detect this out of bounds access).
- static bool providesInformationAboutInteresting(SymbolRef Sym,
- PathSensitiveBugReport &BR);
- static bool providesInformationAboutInteresting(SVal SV,
- PathSensitiveBugReport &BR) {
- return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
- }
-};
-
-BoundsCheckResult checkBounds(ProgramStateRef State,
- SValBuilder &SVB,
- NonLoc Offset,
- std::optional<NonLoc> Extent,
- CheckFlags Flags);
-
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
// instead of `PreStmt` because the current implementation passes the whole
// expression to `CheckerContext::getSVal()` which only works after the
@@ -316,308 +126,6 @@ computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
return std::nullopt;
}
-// NOTE: This function is the "heart" of this checker. It simplifies
-// inequalities with transformations that are valid (and very elementary) in
-// pure mathematics, but become invalid if we use them in C++ number model
-// where the calculations may overflow.
-// Due to the overflow issues I think it's impossible (or at least not
-// practical) to integrate this kind of simplification into the resolution of
-// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
-// produces valid results when the calculations are handling memory offsets
-// and every value is well below SIZE_MAX.
-// TODO: This algorithm should be moved to a central location where it's
-// available for other checkers that need to compare memory offsets.
-// NOTE: the simplification preserves the order of the two operands in a
-// mathematical sense, but it may change the result produced by a C++
-// comparison operator (and the automatic type conversions).
-// For example, consider a comparison "X+1 < 0", where the LHS is stored as a
-// size_t and the RHS is stored in an int. (As size_t is unsigned, this
-// comparison is false for all values of "X".) However, the simplification may
-// turn it into "X < -1", which is still always false in a mathematical sense,
-// but can produce a true result when evaluated by `evalBinOp` (which follows
-// the rules of C++ and casts -1 to SIZE_MAX).
-static std::pair<NonLoc, nonloc::ConcreteInt>
-getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
- SValBuilder &svalBuilder) {
- const llvm::APSInt &extentVal = extent.getValue();
- std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
- if (SymVal && SymVal->isExpression()) {
- if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
- llvm::APSInt constant = APSIntType(extentVal).convert(SIE->getRHS());
- switch (SIE->getOpcode()) {
- case BO_Mul:
- // The constant should never be 0 here, becasue multiplication by zero
- // is simplified by the engine.
- if ((extentVal % constant) != 0)
- return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
- else
- return getSimplifiedOffsets(
- nonloc::SymbolVal(SIE->getLHS()),
- svalBuilder.makeIntVal(extentVal / constant), svalBuilder);
- case BO_Add:
- return getSimplifiedOffsets(
- nonloc::SymbolVal(SIE->getLHS()),
- svalBuilder.makeIntVal(extentVal - constant), svalBuilder);
- default:
- break;
- }
- }
- }
-
- return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
-}
-
-static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {
- const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value);
- return MaxV && MaxV->isNegative();
-}
-
-static bool isUnsigned(SValBuilder &SVB, NonLoc Value) {
- QualType T = Value.getType(SVB.getContext());
- return T->isUnsignedIntegerType();
-}
-
-// Evaluate the comparison Value < Threshold with the help of the custom
-// simplification algorithm defined for this checker. Return a pair of states,
-// where the first one corresponds to "value below threshold" and the second
-// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
-// the case when the evaluation fails.
-// If the optional argument CheckEquality is true, then use BO_EQ instead of
-// the default BO_LT after consistently applying the same simplification steps.
-static std::pair<ProgramStateRef, ProgramStateRef>
-compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
- SValBuilder &SVB, bool CheckEquality = false) {
- if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
- std::tie(Value, Threshold) =
- getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
- }
-
- // We want to perform a _mathematical_ comparison between the numbers `Value`
- // and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may
- // perform automatic conversions. For example the number -1 is less than the
- // number 1000, but -1 < `1000ull` will evaluate to `false` because the `int`
- // -1 is converted to ULONGLONG_MAX.
- // To avoid automatic conversions, we evaluate the "obvious" cases without
- // calling `evalBinOpNN`:
- if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) {
- if (CheckEquality) {
- // negative_value == unsigned_threshold is always false
- return {nullptr, State};
- }
- // negative_value < unsigned_threshold is always true
- return {State, nullptr};
- }
- if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) {
- // unsigned_value == negative_threshold and
- // unsigned_value < negative_threshold are both always false
- return {nullptr, State};
- }
- // FIXME: These special cases are sufficient for handling real-world
- // comparisons, but in theory there could be contrived situations where
- // automatic conversion of a symbolic value (which can be negative and can be
- // positive) leads to incorrect results.
- // NOTE: We NEED to use the `evalBinOpNN` call in the "common" case, because
- // we want to ensure that assumptions coming from this precondition and
- // assumptions coming from regular C/C++ operator calls are represented by
- // constraints on the same symbolic expression. A solution that would
- // evaluate these "mathematical" comparisons through a separate pathway would
- // be a step backwards in this sense.
-
- const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
- auto BelowThreshold =
- SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
- .getAs<NonLoc>();
-
- if (BelowThreshold)
- return State->assume(*BelowThreshold);
-
- return {nullptr, nullptr};
-}
-
-static std::string getRegionName(const MemSpaceRegion *Space,
- const SubRegion *Region) {
- if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
- return RegName;
-
- // Field regions only have descriptive names when their parent has a
- // descriptive name; so we provide a fallback representation for them:
- if (const auto *FR = Region->getAs<FieldRegion>()) {
- if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
- return formatv("the field '{0}'", Name);
- return "the unnamed field";
- }
-
- if (isa<AllocaRegion>(Region))
- return "the memory returned by 'alloca'";
-
- if (isa<SymbolicRegion>(Region) && isa<HeapSpaceRegion>(Space))
- return "the heap area";
-
- if (isa<StringRegion>(Region))
- return "the string literal";
-
- return "the region";
-}
-
-static std::optional<int64_t> getConcreteValue(NonLoc SV) {
- if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
- return ConcreteVal->getValue()->tryExtValue();
- }
- return std::nullopt;
-}
-
-static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
- return SV ? getConcreteValue(*SV) : std::nullopt;
-}
-
-/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
-/// it can be performed (`Divisor` is nonzero and there is no remainder). The
-/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
-/// division is considered to be successful.
-static bool tryDividePair(std::optional<int64_t> &Val1,
- std::optional<int64_t> &Val2, int64_t Divisor) {
- if (!Divisor)
- return false;
- const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
- const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
- if (Val1HasRemainder || Val2HasRemainder)
- return false;
- if (Val1)
- *Val1 /= Divisor;
- if (Val2)
- *Val2 /= Divisor;
- return true;
-}
-
-static Messages getNonTaintMsgs(std::string RegName, SizeUnit SU, NonLoc Offset,
- std::optional<NonLoc> Extent,
- BadOffsetKind Problem) {
-
- std::optional<int64_t> OffsetN = getConcreteValue(Offset);
- std::optional<int64_t> ExtentN = getConcreteValue(Extent);
-
- if (Problem == BadOffsetKind::Negative)
- ExtentN = std::nullopt;
-
- bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
- const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
-
- SmallString<256> Buf;
- llvm::raw_svector_ostream Out(Buf);
- Out << "Access of ";
- if (OffsetN && !ExtentN && !UseByteOffsets) {
- // If the offset is reported as an index, then the report must mention the
- // element type (because it is not always clear from the code). It's more
- // natural to mention the element type later where the extent is described,
- // but if the extent is unknown/irrelevant, then the element type can be
- // inserted into the message at this point.
- Out << SU.asElementName(/*ForceBytes=*/false) << " in ";
- }
- Out << RegName << " at ";
- if (OffsetN) {
- if (Problem == BadOffsetKind::Negative)
- Out << "negative ";
- Out << OffsetOrIndex << " " << *OffsetN;
- } else {
- Out << asAdjective(Problem) << " " << OffsetOrIndex;
- }
- if (ExtentN) {
- Out << ", while it holds only ";
- if (*ExtentN != 1)
- Out << *ExtentN;
- else
- Out << "a single";
-
- Out << ' ' << SU.asElementName(/*ForceBytes=*/UseByteOffsets);
-
- if (*ExtentN > 1)
- Out << "s";
- }
-
- return {formatv("Out of bound access to memory {0} {1}",
- asPreposition(Problem), RegName),
- std::string(Buf)};
-}
-
-static Messages getTaintMsgs(std::string RegName, const char *OffsetName,
- bool AlsoMentionUnderflow) {
- return {formatv("Potential out of bound access to {0} with tainted {1}",
- RegName, OffsetName),
- formatv("Access of {0} with a tainted {1} that may be {2}too large",
- RegName, OffsetName,
- AlsoMentionUnderflow ? "negative or " : "")};
-}
-
-std::string BoundsCheckResult::getMessage(PathSensitiveBugReport &BR,
- StringRef RegName,
- SizeUnit SU) const {
- bool ShouldReportNonNegative = AssumedNonNegative;
- if (!providesInformationAboutInteresting(Offset, BR)) {
- if (AssumedUpperBound && providesInformationAboutInteresting(*Extent, BR)) {
- // Even if the byte offset isn't interesting (e.g. it's a constant value),
- // the assumption can still be interesting if it provides information
- // about an interesting symbolic upper bound.
- ShouldReportNonNegative = false;
- } else {
- // We don't have anything interesting, don't report the assumption.
- return "";
- }
- }
-
- std::optional<int64_t> OffsetN = getConcreteValue(Offset);
- std::optional<int64_t> ExtentN = getConcreteValue(Extent);
-
- const bool UseIndex =
- !SU.isBytes() && tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
-
- SmallString<256> Buf;
- llvm::raw_svector_ostream Out(Buf);
- Out << "Assuming ";
- if (UseIndex) {
- Out << "index ";
- if (OffsetN)
- Out << "'" << OffsetN << "' ";
- } else if (Extent) {
- Out << "byte offset ";
- if (OffsetN)
- Out << "'" << OffsetN << "' ";
- } else {
- Out << "offset ";
- }
-
- Out << "is";
- if (ShouldReportNonNegative) {
- Out << " non-negative";
- }
- if (Extent) {
- if (ShouldReportNonNegative)
- Out << " and";
- Out << " less than ";
- if (ExtentN)
- Out << *ExtentN << ", ";
- Out << SU.asExtentDesc(/*ForceBytes=*/!UseIndex) << ' ' << RegName;
- }
- return std::string(Out.str());
-}
-
-bool BoundsCheckResult::providesInformationAboutInteresting(
- SymbolRef Sym, PathSensitiveBugReport &BR) {
- if (!Sym)
- return false;
- for (SymbolRef PartSym : Sym->symbols()) {
- // The interestingess mark may appear on any layer as we're stripping off
- // the SymIntExpr, UnarySymExpr etc. layers...
- if (BR.isInteresting(PartSym))
- return true;
- // ...but if both sides of the expression are symbolic, then there is no
- // practical algorithm to produce separate constraints for the two
- // operands (from the single combined result).
- if (isa<SymSymExpr>(PartSym))
- return false;
- }
- return false;
-}
-
void ArrayBoundChecker::handleAccessExpr(const Expr *E,
CheckerContext &C) const {
const SVal Location = C.getSVal(E);
@@ -711,111 +219,6 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
}
}
-namespace {
-BoundsCheckResult checkBounds(ProgramStateRef State,
- SValBuilder &SVB,
- NonLoc Offset,
- std::optional<NonLoc> Extent,
- CheckFlags Flags) {
- BoundsCheckResult Res(Offset, Extent);
-
- // CHECK LOWER BOUND
- if (Flags.CheckUnderflow) {
- auto [PrecedesLowerBound, WithinLowerBound] =
- compareValueToThreshold(State, Offset, SVB.makeZeroArrayIndex(), SVB);
-
- if (PrecedesLowerBound) {
- // The analyzer thinks that the offset may be invalid (negative)...
- if (Flags.OffsetObviouslyNonnegative) {
- // ...but the offset is obviously non-negative (clear array subscript
- // with an unsigned index), so we're in a buggy situation.
-
- // TODO: Currently the analyzer ignores many casts (e.g. signed ->
- // unsigned casts), so it can easily reach states where it will load a
- // signed (and negative) value from an unsigned variable. This sanity
- // check is a duct tape "solution" that silences most of the ugly false
- // positives that are caused by this buggy behavior. Note that this is
- // not a complete solution: this cannot silence reports where pointer
- // arithmetic complicates the picture and cannot ensure modeling of the
- // "unsigned index is positive with highest bit set" cases which are
- // "usurped" by the nonsense "unsigned index is negative" case.
- // For more information about this topic, see the umbrella ticket
- // https://github.com/llvm/llvm-project/issues/39492
- // TODO: Remove this hack once 'SymbolCast's are modeled properly.
-
- if (!WithinLowerBound) {
- // The state is completely nonsense -- let's just sink it!
- Res.finalize(BoundsCheckResult::Kind::Paradox, PrecedesLowerBound);
- return Res;
- }
- // Otherwise continue on the 'WithinLowerBound' branch where the
- // unsigned index _is_ non-negative. Don't mention this assumption as a
- // note tag, because it would just confuse the users!
- } else {
- if (!WithinLowerBound) {
- // ...and it cannot be valid (>= 0), so report an error.
- Res.finalize(BoundsCheckResult::Kind::Underflow, PrecedesLowerBound);
- return Res;
- }
- // ...but it can be valid as well, so the checker will (optimistically)
- // assume that it's valid and mention this in the note tag.
- Res.recordNonNegativeAssumption();
- }
- }
-
- // Actually update the state. The "if" only fails in the extremely unlikely
- // case when compareValueToThreshold returns {nullptr, nullptr} because
- // evalBinOpNN fails to evaluate the less-than operator.
- if (WithinLowerBound)
- State = WithinLowerBound;
- }
-
- // CHECK UPPER BOUND
- if (Extent) {
- auto [WithinUpperBound, ExceedsUpperBound] =
- compareValueToThreshold(State, Offset, *Extent, SVB);
-
- if (ExceedsUpperBound) {
- // The offset may be invalid (>= Size)...
- if (!WithinUpperBound) {
- // ...and it cannot be within bounds, so report an error, unless we can
- // definitely determine that this is an idiomatic `&array[size]`
- // expression that calculates the past-the-end pointer.
- if (Flags.AcceptPastTheEnd) {
- auto [EqualsToThreshold, NotEqualToThreshold] =
- compareValueToThreshold(State, Offset, *Extent, SVB,
- /*CheckEquality=*/true);
- if (EqualsToThreshold && !NotEqualToThreshold) {
- Res.finalize(BoundsCheckResult::Kind::Valid, State);
- return Res;
- }
- }
-
- Res.finalize(BoundsCheckResult::Kind::Overflow, ExceedsUpperBound);
- return Res;
- }
- // ...and it can be valid as well...
- if (isTainted(State, Offset)) {
- // ...but it's tainted, so report an error.
- Res.finalize(BoundsCheckResult::Kind::TaintBug, State);
- return Res;
- }
- // ...and it isn't tainted, so the checker will (optimistically) assume
- // that the offset is in bounds and mention this in the note tag.
- Res.recordUpperBoundAssumption();
- }
-
- // Actually update the state. The "if" only fails in the extremely unlikely
- // case when compareValueToThreshold returns {nullptr, nullptr} because
- // evalBinOpNN fails to evaluate the less-than operator.
- if (WithinUpperBound)
- State = WithinUpperBound;
- }
- Res.finalize(BoundsCheckResult::Kind::Valid, State);
- return Res;
-}
-} // anonymous namespace
-
void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
ProgramStateRef ErrorState,
NonLoc Val, bool MarkTaint) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
new file mode 100644
index 0000000000000..dbdccf516517a
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
@@ -0,0 +1,447 @@
+//===- BoundsChecking.cpp - Bounds checking related APIs --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines APIs for performing a bounds check (i.e. comparing a
+// symbolic Offset value to zero and a symbolic Extent value) and composing
+// descriptions that explain its results.
+//
+// This is intended as a replacement for `ProgramState::assumeInBound` to
+// avoid its incorrect logic and compensate for deficiencies of other parts of
+// the analyzer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BoundsChecking.h"
+#include "clang/StaticAnalyzer/Checkers/Taint.h"
+
+namespace clang {
+namespace ento {
+
+const ArraySubscriptExpr *
+getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) {
+ const auto *ASE = dyn_cast<ArraySubscriptExpr>(E);
+ if (!ASE)
+ return nullptr;
+
+ const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion();
+ if (!SubscriptBaseReg)
+ return nullptr;
+
+ // The base of the subscript expression is affected by pointer arithmetics,
+ // so we want to report byte offsets instead of indices and we don't want to
+ // activate the "index is unsigned -> cannot be negative" shortcut.
+ if (isa<ElementRegion>(SubscriptBaseReg->StripCasts()))
+ return nullptr;
+
+ return ASE;
+}
+
+// NOTE: This function is the "heart" of this checker. It simplifies
+// inequalities with transformations that are valid (and very elementary) in
+// pure mathematics, but become invalid if we use them in C++ number model
+// where the calculations may overflow.
+// Due to the overflow issues I think it's impossible (or at least not
+// practical) to integrate this kind of simplification into the resolution of
+// arbitrary inequalities (i.e. the code of `evalBinOp`); but this function
+// produces valid results when the calculations are handling memory offsets
+// and every value is well below SIZE_MAX.
+// TODO: This algorithm should be moved to a central location where it's
+// available for other checkers that need to compare memory offsets.
+// NOTE: the simplification preserves the order of the two operands in a
+// mathematical sense, but it may change the result produced by a C++
+// comparison operator (and the automatic type conversions).
+// For example, consider a comparison "X+1 < 0", where the LHS is stored as a
+// size_t and the RHS is stored in an int. (As size_t is unsigned, this
+// comparison is false for all values of "X".) However, the simplification may
+// turn it into "X < -1", which is still always false in a mathematical sense,
+// but can produce a true result when evaluated by `evalBinOp` (which follows
+// the rules of C++ and casts -1 to SIZE_MAX).
+static std::pair<NonLoc, nonloc::ConcreteInt>
+getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
+ SValBuilder &svalBuilder) {
+ const llvm::APSInt &extentVal = extent.getValue();
+ std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
+ if (SymVal && SymVal->isExpression()) {
+ if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
+ llvm::APSInt constant = APSIntType(extentVal).convert(SIE->getRHS());
+ switch (SIE->getOpcode()) {
+ case BO_Mul:
+ // The constant should never be 0 here, becasue multiplication by zero
+ // is simplified by the engine.
+ if ((extentVal % constant) != 0)
+ return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
+ else
+ return getSimplifiedOffsets(
+ nonloc::SymbolVal(SIE->getLHS()),
+ svalBuilder.makeIntVal(extentVal / constant), svalBuilder);
+ case BO_Add:
+ return getSimplifiedOffsets(
+ nonloc::SymbolVal(SIE->getLHS()),
+ svalBuilder.makeIntVal(extentVal - constant), svalBuilder);
+ default:
+ break;
+ }
+ }
+ }
+
+ return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
+}
+
+static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {
+ const llvm::APSInt *MaxV = SVB.getMaxValue(State, Value);
+ return MaxV && MaxV->isNegative();
+}
+
+static bool isUnsigned(SValBuilder &SVB, NonLoc Value) {
+ QualType T = Value.getType(SVB.getContext());
+ return T->isUnsignedIntegerType();
+}
+
+// Evaluate the comparison Value < Threshold with the help of the custom
+// simplification algorithm defined for this checker. Return a pair of states,
+// where the first one corresponds to "value below threshold" and the second
+// corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
+// the case when the evaluation fails.
+// If the optional argument CheckEquality is true, then use BO_EQ instead of
+// the default BO_LT after consistently applying the same simplification steps.
+static std::pair<ProgramStateRef, ProgramStateRef>
+compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
+ SValBuilder &SVB, bool CheckEquality = false) {
+ if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
+ std::tie(Value, Threshold) =
+ getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
+ }
+
+ // We want to perform a _mathematical_ comparison between the numbers `Value`
+ // and `Threshold`; but `evalBinOpNN` evaluates a C/C++ operator that may
+ // perform automatic conversions. For example the number -1 is less than the
+ // number 1000, but -1 < `1000ull` will evaluate to `false` because the `int`
+ // -1 is converted to ULONGLONG_MAX.
+ // To avoid automatic conversions, we evaluate the "obvious" cases without
+ // calling `evalBinOpNN`:
+ if (isNegative(SVB, State, Value) && isUnsigned(SVB, Threshold)) {
+ if (CheckEquality) {
+ // negative_value == unsigned_threshold is always false
+ return {nullptr, State};
+ }
+ // negative_value < unsigned_threshold is always true
+ return {State, nullptr};
+ }
+ if (isUnsigned(SVB, Value) && isNegative(SVB, State, Threshold)) {
+ // unsigned_value == negative_threshold and
+ // unsigned_value < negative_threshold are both always false
+ return {nullptr, State};
+ }
+ // FIXME: These special cases are sufficient for handling real-world
+ // comparisons, but in theory there could be contrived situations where
+ // automatic conversion of a symbolic value (which can be negative and can be
+ // positive) leads to incorrect results.
+ // NOTE: We NEED to use the `evalBinOpNN` call in the "common" case, because
+ // we want to ensure that assumptions coming from this precondition and
+ // assumptions coming from regular C/C++ operator calls are represented by
+ // constraints on the same symbolic expression. A solution that would
+ // evaluate these "mathematical" comparisons through a separate pathway would
+ // be a step backwards in this sense.
+
+ const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT;
+ auto BelowThreshold =
+ SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType())
+ .getAs<NonLoc>();
+
+ if (BelowThreshold)
+ return State->assume(*BelowThreshold);
+
+ return {nullptr, nullptr};
+}
+
+std::string getRegionName(const MemSpaceRegion *Space,
+ const SubRegion *Region) {
+ if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
+ return RegName;
+
+ // Field regions only have descriptive names when their parent has a
+ // descriptive name; so we provide a fallback representation for them:
+ if (const auto *FR = Region->getAs<FieldRegion>()) {
+ if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
+ return formatv("the field '{0}'", Name);
+ return "the unnamed field";
+ }
+
+ if (isa<AllocaRegion>(Region))
+ return "the memory returned by 'alloca'";
+
+ if (isa<SymbolicRegion>(Region) && isa<HeapSpaceRegion>(Space))
+ return "the heap area";
+
+ if (isa<StringRegion>(Region))
+ return "the string literal";
+
+ return "the region";
+}
+
+static std::optional<int64_t> getConcreteValue(NonLoc SV) {
+ if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
+ return ConcreteVal->getValue()->tryExtValue();
+ }
+ return std::nullopt;
+}
+
+static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
+ return SV ? getConcreteValue(*SV) : std::nullopt;
+}
+
+/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
+/// it can be performed (`Divisor` is nonzero and there is no remainder). The
+/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
+/// division is considered to be successful.
+static bool tryDividePair(std::optional<int64_t> &Val1,
+ std::optional<int64_t> &Val2, int64_t Divisor) {
+ if (!Divisor)
+ return false;
+ const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
+ const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
+ if (Val1HasRemainder || Val2HasRemainder)
+ return false;
+ if (Val1)
+ *Val1 /= Divisor;
+ if (Val2)
+ *Val2 /= Divisor;
+ return true;
+}
+
+Messages getNonTaintMsgs(std::string RegName, SizeUnit SU, NonLoc Offset,
+ std::optional<NonLoc> Extent, BadOffsetKind Problem) {
+
+ std::optional<int64_t> OffsetN = getConcreteValue(Offset);
+ std::optional<int64_t> ExtentN = getConcreteValue(Extent);
+
+ if (Problem == BadOffsetKind::Negative)
+ ExtentN = std::nullopt;
+
+ bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
+ const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
+
+ SmallString<256> Buf;
+ llvm::raw_svector_ostream Out(Buf);
+ Out << "Access of ";
+ if (OffsetN && !ExtentN && !UseByteOffsets) {
+ // If the offset is reported as an index, then the report must mention the
+ // element type (because it is not always clear from the code). It's more
+ // natural to mention the element type later where the extent is described,
+ // but if the extent is unknown/irrelevant, then the element type can be
+ // inserted into the message at this point.
+ Out << SU.asElementName(/*ForceBytes=*/false) << " in ";
+ }
+ Out << RegName << " at ";
+ if (OffsetN) {
+ if (Problem == BadOffsetKind::Negative)
+ Out << "negative ";
+ Out << OffsetOrIndex << " " << *OffsetN;
+ } else {
+ Out << asAdjective(Problem) << " " << OffsetOrIndex;
+ }
+ if (ExtentN) {
+ Out << ", while it holds only ";
+ if (*ExtentN != 1)
+ Out << *ExtentN;
+ else
+ Out << "a single";
+
+ Out << ' ' << SU.asElementName(/*ForceBytes=*/UseByteOffsets);
+
+ if (*ExtentN > 1)
+ Out << "s";
+ }
+
+ return {formatv("Out of bound access to memory {0} {1}",
+ asPreposition(Problem), RegName),
+ std::string(Buf)};
+}
+
+Messages getTaintMsgs(std::string RegName, const char *OffsetName,
+ bool AlsoMentionUnderflow) {
+ return {formatv("Potential out of bound access to {0} with tainted {1}",
+ RegName, OffsetName),
+ formatv("Access of {0} with a tainted {1} that may be {2}too large",
+ RegName, OffsetName,
+ AlsoMentionUnderflow ? "negative or " : "")};
+}
+
+std::string BoundsCheckResult::getMessage(PathSensitiveBugReport &BR,
+ StringRef RegName,
+ SizeUnit SU) const {
+ bool ShouldReportNonNegative = AssumedNonNegative;
+ if (!providesInformationAboutInteresting(Offset, BR)) {
+ if (AssumedUpperBound && providesInformationAboutInteresting(*Extent, BR)) {
+ // Even if the byte offset isn't interesting (e.g. it's a constant value),
+ // the assumption can still be interesting if it provides information
+ // about an interesting symbolic upper bound.
+ ShouldReportNonNegative = false;
+ } else {
+ // We don't have anything interesting, don't report the assumption.
+ return "";
+ }
+ }
+
+ std::optional<int64_t> OffsetN = getConcreteValue(Offset);
+ std::optional<int64_t> ExtentN = getConcreteValue(Extent);
+
+ const bool UseIndex =
+ !SU.isBytes() && tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
+
+ SmallString<256> Buf;
+ llvm::raw_svector_ostream Out(Buf);
+ Out << "Assuming ";
+ if (UseIndex) {
+ Out << "index ";
+ if (OffsetN)
+ Out << "'" << OffsetN << "' ";
+ } else if (Extent) {
+ Out << "byte offset ";
+ if (OffsetN)
+ Out << "'" << OffsetN << "' ";
+ } else {
+ Out << "offset ";
+ }
+
+ Out << "is";
+ if (ShouldReportNonNegative) {
+ Out << " non-negative";
+ }
+ if (Extent) {
+ if (ShouldReportNonNegative)
+ Out << " and";
+ Out << " less than ";
+ if (ExtentN)
+ Out << *ExtentN << ", ";
+ Out << SU.asExtentDesc(/*ForceBytes=*/!UseIndex) << ' ' << RegName;
+ }
+ return std::string(Out.str());
+}
+
+bool BoundsCheckResult::providesInformationAboutInteresting(
+ SymbolRef Sym, PathSensitiveBugReport &BR) {
+ if (!Sym)
+ return false;
+ for (SymbolRef PartSym : Sym->symbols()) {
+ // The interestingess mark may appear on any layer as we're stripping off
+ // the SymIntExpr, UnarySymExpr etc. layers...
+ if (BR.isInteresting(PartSym))
+ return true;
+ // ...but if both sides of the expression are symbolic, then there is no
+ // practical algorithm to produce separate constraints for the two
+ // operands (from the single combined result).
+ if (isa<SymSymExpr>(PartSym))
+ return false;
+ }
+ return false;
+}
+
+BoundsCheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
+ NonLoc Offset, std::optional<NonLoc> Extent,
+ CheckFlags Flags) {
+ BoundsCheckResult Res(Offset, Extent);
+
+ // CHECK LOWER BOUND
+ if (Flags.CheckUnderflow) {
+ auto [PrecedesLowerBound, WithinLowerBound] =
+ compareValueToThreshold(State, Offset, SVB.makeZeroArrayIndex(), SVB);
+
+ if (PrecedesLowerBound) {
+ // The analyzer thinks that the offset may be invalid (negative)...
+ if (Flags.OffsetObviouslyNonnegative) {
+ // ...but the offset is obviously non-negative (clear array subscript
+ // with an unsigned index), so we're in a buggy situation.
+
+ // TODO: Currently the analyzer ignores many casts (e.g. signed ->
+ // unsigned casts), so it can easily reach states where it will load a
+ // signed (and negative) value from an unsigned variable. This sanity
+ // check is a duct tape "solution" that silences most of the ugly false
+ // positives that are caused by this buggy behavior. Note that this is
+ // not a complete solution: this cannot silence reports where pointer
+ // arithmetic complicates the picture and cannot ensure modeling of the
+ // "unsigned index is positive with highest bit set" cases which are
+ // "usurped" by the nonsense "unsigned index is negative" case.
+ // For more information about this topic, see the umbrella ticket
+ // https://github.com/llvm/llvm-project/issues/39492
+ // TODO: Remove this hack once 'SymbolCast's are modeled properly.
+
+ if (!WithinLowerBound) {
+ // The state is completely nonsense -- let's just sink it!
+ Res.finalize(BoundsCheckResult::Kind::Paradox, PrecedesLowerBound);
+ return Res;
+ }
+ // Otherwise continue on the 'WithinLowerBound' branch where the
+ // unsigned index _is_ non-negative. Don't mention this assumption as a
+ // note tag, because it would just confuse the users!
+ } else {
+ if (!WithinLowerBound) {
+ // ...and it cannot be valid (>= 0), so report an error.
+ Res.finalize(BoundsCheckResult::Kind::Underflow, PrecedesLowerBound);
+ return Res;
+ }
+ // ...but it can be valid as well, so the checker will (optimistically)
+ // assume that it's valid and mention this in the note tag.
+ Res.recordNonNegativeAssumption();
+ }
+ }
+
+ // Actually update the state. The "if" only fails in the extremely unlikely
+ // case when compareValueToThreshold returns {nullptr, nullptr} because
+ // evalBinOpNN fails to evaluate the less-than operator.
+ if (WithinLowerBound)
+ State = WithinLowerBound;
+ }
+
+ // CHECK UPPER BOUND
+ if (Extent) {
+ auto [WithinUpperBound, ExceedsUpperBound] =
+ compareValueToThreshold(State, Offset, *Extent, SVB);
+
+ if (ExceedsUpperBound) {
+ // The offset may be invalid (>= Size)...
+ if (!WithinUpperBound) {
+ // ...and it cannot be within bounds, so report an error, unless we can
+ // definitely determine that this is an idiomatic `&array[size]`
+ // expression that calculates the past-the-end pointer.
+ if (Flags.AcceptPastTheEnd) {
+ auto [EqualsToThreshold, NotEqualToThreshold] =
+ compareValueToThreshold(State, Offset, *Extent, SVB,
+ /*CheckEquality=*/true);
+ if (EqualsToThreshold && !NotEqualToThreshold) {
+ Res.finalize(BoundsCheckResult::Kind::Valid, State);
+ return Res;
+ }
+ }
+
+ Res.finalize(BoundsCheckResult::Kind::Overflow, ExceedsUpperBound);
+ return Res;
+ }
+ // ...and it can be valid as well...
+ if (taint::isTainted(State, Offset)) {
+ // ...but it's tainted, so report an error.
+ Res.finalize(BoundsCheckResult::Kind::TaintBug, State);
+ return Res;
+ }
+ // ...and it isn't tainted, so the checker will (optimistically) assume
+ // that the offset is in bounds and mention this in the note tag.
+ Res.recordUpperBoundAssumption();
+ }
+
+ // Actually update the state. The "if" only fails in the extremely unlikely
+ // case when compareValueToThreshold returns {nullptr, nullptr} because
+ // evalBinOpNN fails to evaluate the less-than operator.
+ if (WithinUpperBound)
+ State = WithinUpperBound;
+ }
+ Res.finalize(BoundsCheckResult::Kind::Valid, State);
+ return Res;
+}
+
+} // namespace ento
+} // namespace clang
diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index 8a0621077b977..d2874f1758e14 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -13,6 +13,7 @@ add_clang_library(clangStaticAnalyzerCheckers
BitwiseShiftChecker.cpp
BlockInCriticalSectionChecker.cpp
BoolAssignmentChecker.cpp
+ BoundsChecking.cpp
BuiltinFunctionChecker.cpp
CStringChecker.cpp
CStringSyntaxChecker.cpp
diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
index b3ac48975ee85..aa0dfc042e022 100644
--- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
@@ -22,6 +22,7 @@ static_library("Checkers") {
"BitwiseShiftChecker.cpp",
"BlockInCriticalSectionChecker.cpp",
"BoolAssignmentChecker.cpp",
+ "BoundsChecking.cpp",
"BuiltinFunctionChecker.cpp",
"CStringChecker.cpp",
"CStringSyntaxChecker.cpp",
>From f92d798102934dc827c8ad23ec767002f5c1bbfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 8 Jun 2026 16:42:27 +0200
Subject: [PATCH 11/15] Remove using declaration from header
---
.../include/clang/StaticAnalyzer/Checkers/BoundsChecking.h | 7 +++----
clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp | 2 ++
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
index 8aebe63556182..e873210cdaca2 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
+++ b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
@@ -22,8 +22,6 @@
#include "llvm/Support/FormatVariadic.h"
#include <optional>
-using llvm::formatv;
-
namespace clang {
namespace ento {
@@ -77,13 +75,14 @@ class SizeUnit {
std::string asExtentDesc(bool ForceBytes) const {
if (ForceBytes || isBytes())
return "the extent of";
- return formatv("the number of '{0}' elements in", AsType.getAsString());
+ return llvm::formatv("the number of '{0}' elements in",
+ AsType.getAsString());
}
std::string asElementName(bool ForceBytes) const {
if (ForceBytes || isBytes())
return "byte";
- return formatv("'{0}' element", AsType.getAsString());
+ return llvm::formatv("'{0}' element", AsType.getAsString());
}
};
diff --git a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
index dbdccf516517a..df42581683b16 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
@@ -19,6 +19,8 @@
#include "clang/StaticAnalyzer/Checkers/BoundsChecking.h"
#include "clang/StaticAnalyzer/Checkers/Taint.h"
+using llvm::formatv;
+
namespace clang {
namespace ento {
>From 132e445bb17ca8fe0f2244f0c572c5e4bd9890ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 8 Jun 2026 16:43:21 +0200
Subject: [PATCH 12/15] Remove barely used 'using taint;'
---
clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index a3b68854bef5c..bd6936599b0ce 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -23,7 +23,6 @@
using namespace clang;
using namespace ento;
-using namespace taint;
namespace {
// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt`
@@ -199,7 +198,7 @@ void ArrayBoundChecker::handleAccessExpr(const Expr *E,
// nicer to say "tainted index".
const char *OffsetName = "offset";
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
- if (isTainted(State, ASE->getIdx(), C.getStackFrame()))
+ if (taint::isTainted(State, ASE->getIdx(), C.getStackFrame()))
OffsetName = "index";
Messages Msgs =
@@ -237,7 +236,7 @@ void ArrayBoundChecker::markPartsInteresting(PathSensitiveBugReport &BR,
// offset, then put interestingness onto symbols that could be the origin
// of the taint. Note that this may find symbols that did not appear in
// `Sym->symbols()` (because they're only loosely connected to `Val`).
- for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val))
+ for (SymbolRef Sym : taint::getTaintedSymbols(ErrorState, Val))
BR.markInteresting(Sym);
}
}
>From 563e954657baf79d1a7431478a7a375b735c4281 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 8 Jun 2026 16:49:53 +0200
Subject: [PATCH 13/15] Capitalize variable names in moved code
---
.../Checkers/BoundsChecking.cpp | 29 +++++++++----------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
index df42581683b16..64b558e7157c2 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
@@ -64,34 +64,33 @@ getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) {
// but can produce a true result when evaluated by `evalBinOp` (which follows
// the rules of C++ and casts -1 to SIZE_MAX).
static std::pair<NonLoc, nonloc::ConcreteInt>
-getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
- SValBuilder &svalBuilder) {
- const llvm::APSInt &extentVal = extent.getValue();
- std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
+getSimplifiedOffsets(NonLoc Offset, nonloc::ConcreteInt Extent,
+ SValBuilder &SVB) {
+ const llvm::APSInt &ExtentVal = Extent.getValue();
+ std::optional<nonloc::SymbolVal> SymVal = Offset.getAs<nonloc::SymbolVal>();
if (SymVal && SymVal->isExpression()) {
if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
- llvm::APSInt constant = APSIntType(extentVal).convert(SIE->getRHS());
+ llvm::APSInt Constant = APSIntType(ExtentVal).convert(SIE->getRHS());
switch (SIE->getOpcode()) {
case BO_Mul:
- // The constant should never be 0 here, becasue multiplication by zero
+ // The Constant should never be 0 here, becasue multiplication by zero
// is simplified by the engine.
- if ((extentVal % constant) != 0)
- return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
+ if ((ExtentVal % Constant) != 0)
+ return std::pair<NonLoc, nonloc::ConcreteInt>(Offset, Extent);
else
- return getSimplifiedOffsets(
- nonloc::SymbolVal(SIE->getLHS()),
- svalBuilder.makeIntVal(extentVal / constant), svalBuilder);
+ return getSimplifiedOffsets(nonloc::SymbolVal(SIE->getLHS()),
+ SVB.makeIntVal(ExtentVal / Constant),
+ SVB);
case BO_Add:
- return getSimplifiedOffsets(
- nonloc::SymbolVal(SIE->getLHS()),
- svalBuilder.makeIntVal(extentVal - constant), svalBuilder);
+ return getSimplifiedOffsets(nonloc::SymbolVal(SIE->getLHS()),
+ SVB.makeIntVal(ExtentVal - Constant), SVB);
default:
break;
}
}
}
- return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
+ return std::pair<NonLoc, nonloc::ConcreteInt>(Offset, Extent);
}
static bool isNegative(SValBuilder &SVB, ProgramStateRef State, NonLoc Value) {
>From 4103833e30b847513aa181972ce5d7ee09319c0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 8 Jun 2026 17:14:42 +0200
Subject: [PATCH 14/15] Remove the ForceBytes hack
---
.../StaticAnalyzer/Checkers/BoundsChecking.h | 19 +++++++---
.../Checkers/BoundsChecking.cpp | 36 +++++++++----------
2 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
index e873210cdaca2..9d86f7314ca63 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
+++ b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
@@ -72,18 +72,29 @@ class SizeUnit {
int64_t asCharUnits() const { return AsCharUnits; }
- std::string asExtentDesc(bool ForceBytes) const {
- if (ForceBytes || isBytes())
+ std::string asExtentDesc() const {
+ if (isBytes())
return "the extent of";
return llvm::formatv("the number of '{0}' elements in",
AsType.getAsString());
}
- std::string asElementName(bool ForceBytes) const {
- if (ForceBytes || isBytes())
+ std::string asElementName() const {
+ if (isBytes())
return "byte";
return llvm::formatv("'{0}' element", AsType.getAsString());
}
+
+ std::string getOffsetName() const {
+ return isBytes() ? "byte offset" : "index";
+ }
+
+ /// Try to divide `Val1` and `Val2` (in place) by `this->asCharUnits()` and
+ /// return true if it can be performed without remainder. The values `Val1`
+ /// and `Val2` may be nullopt and in that case the corresponding division is
+ /// considered to be successful.
+ bool tryConvertValuesFromBytes(std::optional<int64_t> &Val1,
+ std::optional<int64_t> &Val2) const;
};
struct Messages {
diff --git a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
index 64b558e7157c2..9739c6ffa6e65 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
@@ -200,18 +200,18 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
/// it can be performed (`Divisor` is nonzero and there is no remainder). The
/// values `Val1` and `Val2` may be nullopt and in that case the corresponding
/// division is considered to be successful.
-static bool tryDividePair(std::optional<int64_t> &Val1,
- std::optional<int64_t> &Val2, int64_t Divisor) {
- if (!Divisor)
+bool SizeUnit::tryConvertValuesFromBytes(std::optional<int64_t> &Val1,
+ std::optional<int64_t> &Val2) const {
+ if (!AsCharUnits)
return false;
- const bool Val1HasRemainder = Val1 && *Val1 % Divisor;
- const bool Val2HasRemainder = Val2 && *Val2 % Divisor;
+ const bool Val1HasRemainder = Val1 && *Val1 % AsCharUnits;
+ const bool Val2HasRemainder = Val2 && *Val2 % AsCharUnits;
if (Val1HasRemainder || Val2HasRemainder)
return false;
if (Val1)
- *Val1 /= Divisor;
+ *Val1 /= AsCharUnits;
if (Val2)
- *Val2 /= Divisor;
+ *Val2 /= AsCharUnits;
return true;
}
@@ -224,27 +224,27 @@ Messages getNonTaintMsgs(std::string RegName, SizeUnit SU, NonLoc Offset,
if (Problem == BadOffsetKind::Negative)
ExtentN = std::nullopt;
- bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
- const char *OffsetOrIndex = UseByteOffsets ? "byte offset" : "index";
+ if (!SU.tryConvertValuesFromBytes(OffsetN, ExtentN))
+ SU = SizeUnit::bytes();
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of ";
- if (OffsetN && !ExtentN && !UseByteOffsets) {
+ if (OffsetN && !ExtentN && !SU.isBytes()) {
// If the offset is reported as an index, then the report must mention the
// element type (because it is not always clear from the code). It's more
// natural to mention the element type later where the extent is described,
// but if the extent is unknown/irrelevant, then the element type can be
// inserted into the message at this point.
- Out << SU.asElementName(/*ForceBytes=*/false) << " in ";
+ Out << SU.asElementName() << " in ";
}
Out << RegName << " at ";
if (OffsetN) {
if (Problem == BadOffsetKind::Negative)
Out << "negative ";
- Out << OffsetOrIndex << " " << *OffsetN;
+ Out << SU.getOffsetName() << " " << *OffsetN;
} else {
- Out << asAdjective(Problem) << " " << OffsetOrIndex;
+ Out << asAdjective(Problem) << " " << SU.getOffsetName();
}
if (ExtentN) {
Out << ", while it holds only ";
@@ -253,7 +253,7 @@ Messages getNonTaintMsgs(std::string RegName, SizeUnit SU, NonLoc Offset,
else
Out << "a single";
- Out << ' ' << SU.asElementName(/*ForceBytes=*/UseByteOffsets);
+ Out << ' ' << SU.asElementName();
if (*ExtentN > 1)
Out << "s";
@@ -292,13 +292,13 @@ std::string BoundsCheckResult::getMessage(PathSensitiveBugReport &BR,
std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);
- const bool UseIndex =
- !SU.isBytes() && tryDividePair(OffsetN, ExtentN, SU.asCharUnits());
+ if (!SU.tryConvertValuesFromBytes(OffsetN, ExtentN))
+ SU = SizeUnit::bytes();
SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Assuming ";
- if (UseIndex) {
+ if (!SU.isBytes()) {
Out << "index ";
if (OffsetN)
Out << "'" << OffsetN << "' ";
@@ -320,7 +320,7 @@ std::string BoundsCheckResult::getMessage(PathSensitiveBugReport &BR,
Out << " less than ";
if (ExtentN)
Out << *ExtentN << ", ";
- Out << SU.asExtentDesc(/*ForceBytes=*/!UseIndex) << ' ' << RegName;
+ Out << SU.asExtentDesc() << ' ' << RegName;
}
return std::string(Out.str());
}
>From 68286cba24b150c6b64527c756836dde8c5dc783 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Mon, 8 Jun 2026 17:22:01 +0200
Subject: [PATCH 15/15] Simplify providesInformationAboutInteresting
---
.../clang/StaticAnalyzer/Checkers/BoundsChecking.h | 14 +++++---------
.../lib/StaticAnalyzer/Checkers/BoundsChecking.cpp | 3 ++-
2 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
index 9d86f7314ca63..e6c07c06fc010 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
+++ b/clang/include/clang/StaticAnalyzer/Checkers/BoundsChecking.h
@@ -175,10 +175,10 @@ class BoundsCheckResult {
SizeUnit SU) const;
private:
- /// Return true if information about the value of `Sym` can put constraints
- /// on some symbol which is interesting within the bug report `BR`.
- /// In particular, this returns true when `Sym` is interesting within `BR`;
- /// but it also returns true if `Sym` is an expression that contains integer
+ /// Return true if information about the symbol behind `SV` can constrain
+ /// some symbol which is interesting within the bug report `BR`.
+ /// In particular, this returns true when `SV` is interesting within `BR`;
+ /// but it also returns true if `SV` is an expression that contains integer
/// constants and a single symbolic operand which is interesting (in `BR`).
/// We need to use this instead of plain `BR.isInteresting()` because if we
/// are analyzing code like
@@ -189,12 +189,8 @@ class BoundsCheckResult {
/// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not
/// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough
/// to detect this out of bounds access).
- static bool providesInformationAboutInteresting(SymbolRef Sym,
- PathSensitiveBugReport &BR);
static bool providesInformationAboutInteresting(SVal SV,
- PathSensitiveBugReport &BR) {
- return providesInformationAboutInteresting(SV.getAsSymbol(), BR);
- }
+ PathSensitiveBugReport &BR);
};
BoundsCheckResult checkBounds(ProgramStateRef State, SValBuilder &SVB,
diff --git a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
index 9739c6ffa6e65..49e8ee9e6be16 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BoundsChecking.cpp
@@ -326,7 +326,8 @@ std::string BoundsCheckResult::getMessage(PathSensitiveBugReport &BR,
}
bool BoundsCheckResult::providesInformationAboutInteresting(
- SymbolRef Sym, PathSensitiveBugReport &BR) {
+ SVal SV, PathSensitiveBugReport &BR) {
+ SymbolRef Sym = SV.getAsSymbol();
if (!Sym)
return false;
for (SymbolRef PartSym : Sym->symbols()) {
More information about the cfe-commits
mailing list