[clang] 2bc38dc - [analyzer] Improve bug report hashing, merge similar reports (#98621)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 22 02:44:24 PDT 2024
Author: DonĂ¡t Nagy
Date: 2024-07-22T11:44:20+02:00
New Revision: 2bc38dc30bc9baad610925d7e90e724a9d09ee7d
URL: https://github.com/llvm/llvm-project/commit/2bc38dc30bc9baad610925d7e90e724a9d09ee7d
DIFF: https://github.com/llvm/llvm-project/commit/2bc38dc30bc9baad610925d7e90e724a9d09ee7d.diff
LOG: [analyzer] Improve bug report hashing, merge similar reports (#98621)
Previously there were certain situations where
alpha.security.ArrayBoundV2 produced lots of very similar and redundant
reports that only differed in their full `Description` that contained
the (negative) byte offset value. (See
https://github.com/llvm/llvm-project/issues/86969 for details.)
This change updates the `Profile()` method of `PathSensitiveBugReport`
to ensure that it uses `getShortDescription()` instead of the full
`Description` so the standard report deduplication eliminates most of
these redundant reports.
Note that the effects of this change are very limited because there are
very few checkers that specify a separate short description, and so
`getShortDescription()` practically always defaults to returning the
full `Description`.
For the sake of consistency `BasicBugReport::Profile()` is also updated
to use the short description. (Right now there are no checkers that use
`BasicBugReport` with separate long and short descriptions.)
This commit also includes some small code quality improvements in
`ArrayBoundV2` that are IMO too trivial to be moved into a separate
commit.
Added:
Modified:
clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
clang/lib/StaticAnalyzer/Core/BugReporter.cpp
clang/test/Analysis/out-of-bounds-diagnostics.c
Removed:
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index f82288f1099e8..3f837564cf47c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -373,14 +373,14 @@ static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) {
}
static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) {
- std::string RegName = getRegionName(Region);
- SmallString<128> Buf;
- llvm::raw_svector_ostream Out(Buf);
- Out << "Access of " << RegName << " at negative byte offset";
- if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
- Out << ' ' << ConcreteIdx->getValue();
- return {formatv("Out of bound access to memory preceding {0}", RegName),
- std::string(Buf)};
+ std::string RegName = getRegionName(Region), OffsetStr = "";
+
+ if (auto ConcreteOffset = getConcreteValue(Offset))
+ OffsetStr = formatv(" {0}", ConcreteOffset);
+
+ return {
+ formatv("Out of bound access to memory preceding {0}", RegName),
+ formatv("Access of {0} at negative byte offset{1}", RegName, OffsetStr)};
}
/// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if
@@ -609,7 +609,7 @@ void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const {
// CHECK UPPER BOUND
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
if (auto KnownSize = Size.getAs<NonLoc>()) {
- // In a situation where both overflow and overflow are possible (but the
+ // In a situation where both underflow and overflow are possible (but the
// index is either tainted or known to be invalid), the logic of this
// checker will first assume that the offset is non-negative, and then
// (with this additional assumption) it will detect an overflow error.
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
index e2002bfbe594a..d73dc40cf03fb 100644
--- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -2198,7 +2198,7 @@ const Decl *PathSensitiveBugReport::getDeclWithIssue() const {
void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const {
hash.AddInteger(static_cast<int>(getKind()));
hash.AddPointer(&BT);
- hash.AddString(Description);
+ hash.AddString(getShortDescription());
assert(Location.isValid());
Location.Profile(hash);
@@ -2213,7 +2213,7 @@ void BasicBugReport::Profile(llvm::FoldingSetNodeID& hash) const {
void PathSensitiveBugReport::Profile(llvm::FoldingSetNodeID &hash) const {
hash.AddInteger(static_cast<int>(getKind()));
hash.AddPointer(&BT);
- hash.AddString(Description);
+ hash.AddString(getShortDescription());
PathDiagnosticLocation UL = getUniqueingLocation();
if (UL.isValid()) {
UL.Profile(hash);
diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c
index 92f983d8b1561..de70e483add1c 100644
--- a/clang/test/Analysis/out-of-bounds-diagnostics.c
+++ b/clang/test/Analysis/out-of-bounds-diagnostics.c
@@ -17,6 +17,27 @@ int underflowWithDeref(void) {
// expected-note at -2 {{Access of 'TenElements' at negative byte offset -4}}
}
+int rng(void);
+int getIndex(void) {
+ switch (rng()) {
+ case 1: return -152;
+ case 2: return -160;
+ case 3: return -168;
+ default: return -172;
+ }
+}
+
+void gh86959(void) {
+ // Previously code like this produced many almost-identical bug reports that
+ // only
diff ered in the offset value. Verify that now we only see one report.
+
+ // expected-note at +1 {{Entering loop body}}
+ while (rng())
+ TenElements[getIndex()] = 10;
+ // expected-warning at -1 {{Out of bound access to memory preceding 'TenElements'}}
+ // expected-note at -2 {{Access of 'TenElements' at negative byte offset -688}}
+}
+
int scanf(const char *restrict fmt, ...);
void taintedIndex(void) {
More information about the cfe-commits
mailing list