[clang] 22357fe - [analyzer] Avoid unnecessary super region invalidation in `CStringChecker` (#146212)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 7 04:46:33 PDT 2025
Author: flovent
Date: 2025-07-07T13:46:30+02:00
New Revision: 22357fe33a8a8cc221632e32cb443676f1feeda9
URL: https://github.com/llvm/llvm-project/commit/22357fe33a8a8cc221632e32cb443676f1feeda9
DIFF: https://github.com/llvm/llvm-project/commit/22357fe33a8a8cc221632e32cb443676f1feeda9.diff
LOG: [analyzer] Avoid unnecessary super region invalidation in `CStringChecker` (#146212)
Bounded string functions takes smallest of two values as it's copy size
(`amountCopied` variable in `evalStrcpyCommon`), and it's used to
decided whether this operation will cause out-of-bound access and
invalidate it's super region if it does.
for `strlcat`: `amountCopied = min (size - dstLen - 1 , srcLen)`
for others: `amountCopied = min (srcLen, size)`
Currently when one of two values is unknown or `SValBuilder` can't
decide which one is smaller, `amountCopied` will remain `UnknownVal`,
which will invalidate copy destination's super region unconditionally.
This patch add check to see if one of these two values is definitely
in-bound, if so `amountCopied` has to be in-bound too, because it‘s less
than or equal to them, we can avoid the invalidation of super region and
some related false positives in this situation.
Note: This patch uses `size` as an approximation of `size - dstLen - 1`
in `strlcat` case because currently analyzer doesn't handle complex
expressions like this very well.
Closes #143807.
Added:
clang/test/Analysis/cstring-should-not-invalidate.cpp
Modified:
clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
Removed:
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 4d12fdcec1f1a..31cb150892a5d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -2223,6 +2223,44 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
Result = lastElement;
}
+ // For bounded method, amountCopied take the minimum of two values,
+ // for ConcatFnKind::strlcat:
+ // amountCopied = min (size - dstLen - 1 , srcLen)
+ // for others:
+ // amountCopied = min (srcLen, size)
+ // So even if we don't know about amountCopied, as long as one of them will
+ // not cause an out-of-bound access, the whole function's operation will not
+ // too, that will avoid invalidating the superRegion of data member in that
+ // situation.
+ bool CouldAccessOutOfBound = true;
+ if (IsBounded && amountCopied.isUnknown()) {
+ auto CouldAccessOutOfBoundForSVal =
+ [&](std::optional<NonLoc> Val) -> bool {
+ if (!Val)
+ return true;
+ return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
+ Dst.Expression->getType(), *Val,
+ C.getASTContext().getSizeType());
+ };
+
+ CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
+
+ if (CouldAccessOutOfBound) {
+ // Get the max number of characters to copy.
+ const Expr *LenExpr = Call.getArgExpr(2);
+ SVal LenVal = state->getSVal(LenExpr, LCtx);
+
+ // Protect against misdeclared strncpy().
+ LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
+
+ // Because analyzer doesn't handle expressions like `size -
+ // dstLen - 1` very well, we roughly use `size` for
+ // ConcatFnKind::strlcat here, same with other concat kinds.
+ CouldAccessOutOfBound =
+ CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
+ }
+ }
+
// Invalidate the destination (regular invalidation without pointer-escaping
// the address of the top-level region). This must happen before we set the
// C string length because invalidation will clear the length.
@@ -2230,9 +2268,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
// can use LazyCompoundVals to copy the source values into the destination.
// This would probably remove any existing bindings past the end of the
// string, but that's still an improvement over blank invalidation.
- state = invalidateDestinationBufferBySize(
- C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
- amountCopied, C.getASTContext().getSizeType());
+ if (CouldAccessOutOfBound)
+ state = invalidateDestinationBufferBySize(
+ C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
+ amountCopied, C.getASTContext().getSizeType());
+ else
+ state = invalidateDestinationBufferNeverOverflows(
+ C, state, Call.getCFGElementRef(), *dstRegVal);
// Invalidate the source (const-invalidation without const-pointer-escaping
// the address of the top-level region).
diff --git a/clang/test/Analysis/cstring-should-not-invalidate.cpp b/clang/test/Analysis/cstring-should-not-invalidate.cpp
new file mode 100644
index 0000000000000..abd4689b2f9b9
--- /dev/null
+++ b/clang/test/Analysis/cstring-should-not-invalidate.cpp
@@ -0,0 +1,132 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -verify %s
+
+typedef decltype(sizeof(int)) size_t;
+void clang_analyzer_eval(bool);
+
+char *strncpy(char *dest, const char *src, size_t x);
+
+constexpr int initB = 100;
+struct Base {
+ int b;
+ Base(): b(initB) {}
+};
+
+// issue 143807
+struct strncpyTestClass: public Base {
+ int *m_ptr;
+ char m_buff[1000];
+
+ void KnownLen(char *src) {
+ m_ptr = new int;
+ strncpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+ delete m_ptr; // no warning
+ }
+
+ void KnownSrcLen(size_t n) {
+ m_ptr = new int;
+ strncpy(m_buff, "xyz", n); // known src size but unknown len
+ delete m_ptr; // no warning
+ }
+};
+
+void strncpyTest(char *src, size_t n) {
+ strncpyTestClass rep;
+ rep.KnownLen(src);
+ rep.KnownSrcLen(n);
+ clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
+
+size_t strlcpy(char *dest, const char *src, size_t size);
+
+struct strlcpyTestClass: public Base {
+ int *m_ptr;
+ char m_buff[1000];
+
+ void KnownLen(char *src) {
+ m_ptr = new int;
+ strlcpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+ delete m_ptr; // no warning
+ }
+
+ void KnownSrcLen(size_t n) {
+ m_ptr = new int;
+ strlcpy(m_buff, "xyz", n); // known src size but unknown len
+ delete m_ptr; // no warning
+ }
+};
+
+void strlcpyTest(char *src, size_t n) {
+ strlcpyTestClass rep;
+ rep.KnownLen(src);
+ rep.KnownSrcLen(n);
+ clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
+
+char *strncat(char *s1, const char *s2, size_t n);
+
+struct strncatTestClass: public Base {
+ int *m_ptr;
+ char m_buff[1000];
+
+ void KnownLen(char *src) {
+ m_ptr = new int;
+ strncat(m_buff, src, sizeof(m_buff) - 1); // known len but unknown src size
+ delete m_ptr; // no warning
+ }
+
+ void KnownSrcLen(size_t n) {
+ m_ptr = new int;
+ strncat(m_buff, "xyz", n); // known src size but unknown len
+ delete m_ptr; // no warning
+ }
+};
+
+void strncatTest(char *src, size_t n) {
+ strncatTestClass rep;
+ rep.KnownLen(src);
+ rep.KnownSrcLen(n);
+ clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
+
+struct strncatReportOutOfBoundTestClass {
+ int *m_ptr;
+ char m_buff[1000];
+
+ void KnownLen(char *src) {
+ m_ptr = new int;
+ // expected-warning at +1{{String concatenation function overflows the destination buffer}}
+ strncat(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+ delete m_ptr; // no warning
+ }
+};
+
+void strncatReportOutOfBoundTest(char *src, size_t n) {
+ strncatReportOutOfBoundTestClass rep;
+ rep.KnownLen(src);
+}
+
+size_t strlcat(char *dst, const char *src, size_t size);
+
+struct strlcatTestClass: public Base {
+ int *m_ptr;
+ char m_buff[1000];
+
+ void KnownLen(char *src) {
+ m_ptr = new int;
+ strlcat(m_buff, src, sizeof(m_buff)); // known len but unknown src size
+ delete m_ptr; // no warning
+ }
+
+ void KnownSrcLen(size_t n) {
+ m_ptr = new int;
+ strlcat(m_buff, "xyz", n); // known src size but unknown len
+ delete m_ptr; // no warning
+ }
+};
+
+void strlcatTest(char *src, size_t n) {
+ strlcatTestClass rep;
+ rep.KnownLen(src);
+ rep.KnownSrcLen(n);
+ clang_analyzer_eval(rep.b == initB); // expected-warning{{TRUE}}
+}
More information about the cfe-commits
mailing list