[clang] 1bd2d33 - [analyzer][CStringChecker] Adjust the invalidation operation on the super region of the destination buffer during string copy
Ella Ma via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 3 01:15:13 PDT 2023
Author: Ella Ma
Date: 2023-07-03T16:13:47+08:00
New Revision: 1bd2d335b649f2e09d7e4bdd0b92c78489ded022
URL: https://github.com/llvm/llvm-project/commit/1bd2d335b649f2e09d7e4bdd0b92c78489ded022
DIFF: https://github.com/llvm/llvm-project/commit/1bd2d335b649f2e09d7e4bdd0b92c78489ded022.diff
LOG: [analyzer][CStringChecker] Adjust the invalidation operation on the super region of the destination buffer during string copy
Fixing GitHub issue: https://github.com/llvm/llvm-project/issues/55019
Following the previous fix https://reviews.llvm.org/D12571 on issue https://github.com/llvm/llvm-project/issues/23328
The two issues report false memory leaks after calling string-copy APIs with a buffer field in an object as the destination.
The buffer invalidation incorrectly drops the assignment to a heap memory block when no overflow problems happen.
And the pointer of the dropped assignment is declared in the same object of the destination buffer.
The previous fix only considers the `memcpy` functions whose copy length is available from arguments.
In this issue, the copy length is inferable from the buffer declaration and string literals being copied.
Therefore, I have adjusted the previous fix to reuse the copy length computed before.
Besides, for APIs that never overflow (strsep) or we never know whether they can overflow (std::copy),
new invalidation operations have been introduced to inform CStringChecker::InvalidateBuffer whether or not to
invalidate the super region that encompasses the destination buffer.
Reviewed By: steakhal
Differential Revision: https://reviews.llvm.org/D152435
Added:
clang/test/Analysis/issue-55019.cpp
Modified:
clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
clang/test/Analysis/Inputs/system-header-simulator.h
clang/test/Analysis/pr22954.c
Removed:
################################################################################
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 01a35505a90a2c..af9cf1443bacd3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -260,11 +260,34 @@ class CStringChecker : public Checker< eval::Call,
const Expr *expr,
SVal val) const;
- static ProgramStateRef InvalidateBuffer(CheckerContext &C,
- ProgramStateRef state,
- const Expr *Ex, SVal V,
- bool IsSourceBuffer,
- const Expr *Size);
+ /// Invalidate the destination buffer determined by characters copied.
+ static ProgramStateRef
+ invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
+ const Expr *BufE, SVal BufV, SVal SizeV,
+ QualType SizeTy);
+
+ /// Operation never overflows, do not invalidate the super region.
+ static ProgramStateRef invalidateDestinationBufferNeverOverflows(
+ CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
+
+ /// We do not know whether the operation can overflow (e.g. size is unknown),
+ /// invalidate the super region and escape related pointers.
+ static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
+ CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
+
+ /// Invalidate the source buffer for escaping pointers.
+ static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
+ ProgramStateRef S,
+ const Expr *BufE, SVal BufV);
+
+ /// @param InvalidationTraitOperations Determine how to invlidate the
+ /// MemRegion by setting the invalidation traits. Return true to cause pointer
+ /// escape, or false otherwise.
+ static ProgramStateRef invalidateBufferAux(
+ CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
+ llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
+ const MemRegion *)>
+ InvalidationTraitOperations);
static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
const MemRegion *MR);
@@ -310,10 +333,9 @@ class CStringChecker : public Checker< eval::Call,
// Return true if the destination buffer of the copy function may be in bound.
// Expects SVal of Size to be positive and unsigned.
// Expects SVal of FirstBuf to be a FieldRegion.
- static bool IsFirstBufInBound(CheckerContext &C,
- ProgramStateRef state,
- const Expr *FirstBuf,
- const Expr *Size);
+ static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
+ SVal BufVal, QualType BufTy, SVal LengthVal,
+ QualType LengthTy);
};
} //end anonymous namespace
@@ -967,43 +989,40 @@ const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
return strRegion->getStringLiteral();
}
-bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
- ProgramStateRef state,
- const Expr *FirstBuf,
- const Expr *Size) {
+bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
+ SVal BufVal, QualType BufTy,
+ SVal LengthVal, QualType LengthTy) {
// If we do not know that the buffer is long enough we return 'true'.
// Otherwise the parent region of this field region would also get
// invalidated, which would lead to warnings based on an unknown state.
+ if (LengthVal.isUnknown())
+ return false;
+
// Originally copied from CheckBufferAccess and CheckLocation.
- SValBuilder &svalBuilder = C.getSValBuilder();
- ASTContext &Ctx = svalBuilder.getContext();
- const LocationContext *LCtx = C.getLocationContext();
+ SValBuilder &SB = C.getSValBuilder();
+ ASTContext &Ctx = C.getASTContext();
- QualType sizeTy = Size->getType();
QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
- SVal BufVal = state->getSVal(FirstBuf, LCtx);
- SVal LengthVal = state->getSVal(Size, LCtx);
std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
if (!Length)
return true; // cf top comment.
// Compute the offset of the last element to be accessed: size-1.
- NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
- SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy);
+ NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
+ SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
if (Offset.isUnknown())
return true; // cf top comment
NonLoc LastOffset = Offset.castAs<NonLoc>();
// Check that the first buffer is sufficiently long.
- SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
+ SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
if (!BufLoc)
return true; // cf top comment.
- SVal BufEnd =
- svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
+ SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
// Check for out of bound array element access.
const MemRegion *R = BufEnd.getAsRegion();
@@ -1017,28 +1036,90 @@ bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
// FIXME: Does this crash when a non-standard definition
// of a library function is encountered?
assert(ER->getValueType() == C.getASTContext().CharTy &&
- "IsFirstBufInBound should only be called with char* ElementRegions");
+ "isFirstBufInBound should only be called with char* ElementRegions");
// Get the size of the array.
const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
- DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder);
+ DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
// Get the index of the accessed element.
DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
- ProgramStateRef StInBound = state->assumeInBound(Idx, SizeDV, true);
+ ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
return static_cast<bool>(StInBound);
}
-ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
- ProgramStateRef state,
- const Expr *E, SVal V,
- bool IsSourceBuffer,
- const Expr *Size) {
+ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
+ CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
+ SVal SizeV, QualType SizeTy) {
+ auto InvalidationTraitOperations =
+ [&C, S, BufTy = BufE->getType(), BufV, SizeV,
+ SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
+ // If destination buffer is a field region and access is in bound, do
+ // not invalidate its super region.
+ if (MemRegion::FieldRegionKind == R->getKind() &&
+ isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
+ ITraits.setTrait(
+ R,
+ RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
+ }
+ return false;
+ };
+
+ return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
+}
+
+ProgramStateRef
+CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
+ CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
+ auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
+ const MemRegion *R) {
+ return isa<FieldRegion>(R);
+ };
+
+ return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
+}
+
+ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
+ CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
+ auto InvalidationTraitOperations =
+ [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
+ if (MemRegion::FieldRegionKind == R->getKind())
+ ITraits.setTrait(
+ R,
+ RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
+ return false;
+ };
+
+ return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
+}
+
+ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
+ ProgramStateRef S,
+ const Expr *BufE,
+ SVal BufV) {
+ auto InvalidationTraitOperations =
+ [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
+ ITraits.setTrait(
+ R->getBaseRegion(),
+ RegionAndSymbolInvalidationTraits::TK_PreserveContents);
+ ITraits.setTrait(R,
+ RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
+ return true;
+ };
+
+ return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
+}
+
+ProgramStateRef CStringChecker::invalidateBufferAux(
+ CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
+ llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
+ const MemRegion *)>
+ InvalidationTraitOperations) {
std::optional<Loc> L = V.getAs<Loc>();
if (!L)
- return state;
+ return State;
// FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
// some assumptions about the value that CFRefCount can't. Even so, it should
@@ -1055,29 +1136,10 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
// Invalidate this region.
const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
-
- bool CausesPointerEscape = false;
RegionAndSymbolInvalidationTraits ITraits;
- // Invalidate and escape only indirect regions accessible through the source
- // buffer.
- if (IsSourceBuffer) {
- ITraits.setTrait(R->getBaseRegion(),
- RegionAndSymbolInvalidationTraits::TK_PreserveContents);
- ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
- CausesPointerEscape = true;
- } else {
- const MemRegion::Kind& K = R->getKind();
- if (K == MemRegion::FieldRegionKind)
- if (Size && IsFirstBufInBound(C, state, E, Size)) {
- // If destination buffer is a field region and access is in bound,
- // do not invalidate its super region.
- ITraits.setTrait(
- R,
- RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
- }
- }
+ bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
- return state->invalidateRegions(R, E, C.blockCount(), LCtx,
+ return State->invalidateRegions(R, E, C.blockCount(), LCtx,
CausesPointerEscape, nullptr, nullptr,
&ITraits);
}
@@ -1085,7 +1147,7 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
// If we have a non-region value by chance, just remove the binding.
// FIXME: is this necessary or correct? This handles the non-Region
// cases. Is it ever valid to store to these?
- return state->killBinding(*L);
+ return State->killBinding(*L);
}
bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
@@ -1182,8 +1244,8 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
} else {
// If the destination buffer's extent is not equal to the value of
// third argument, just invalidate buffer.
- State = InvalidateBuffer(C, State, DstBuffer, MemVal,
- /*IsSourceBuffer*/ false, Size);
+ State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
+ SizeVal, Size->getType());
}
if (StateNullChar && !StateNonNullChar) {
@@ -1208,8 +1270,8 @@ bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
} else {
// If the offset is not zero and char value is not concrete, we can do
// nothing but invalidate the buffer.
- State = InvalidateBuffer(C, State, DstBuffer, MemVal,
- /*IsSourceBuffer*/ false, Size);
+ State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
+ SizeVal, Size->getType());
}
return true;
}
@@ -1305,15 +1367,14 @@ void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE,
// can use LazyCompoundVals to copy the source values into the destination.
// This would probably remove any existing bindings past the end of the
// copied region, but that's still an improvement over blank invalidation.
- state =
- InvalidateBuffer(C, state, Dest.Expression, C.getSVal(Dest.Expression),
- /*IsSourceBuffer*/ false, Size.Expression);
+ state = invalidateDestinationBufferBySize(
+ C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
+ Size.Expression->getType());
// Invalidate the source (const-invalidation without const-pointer-escaping
// the address of the top-level region).
- state = InvalidateBuffer(C, state, Source.Expression,
- C.getSVal(Source.Expression),
- /*IsSourceBuffer*/ true, nullptr);
+ state = invalidateSourceBuffer(C, state, Source.Expression,
+ C.getSVal(Source.Expression));
C.addTransition(state);
}
@@ -1985,13 +2046,13 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
// can use LazyCompoundVals to copy the source values into the destination.
// This would probably remove any existing bindings past the end of the
// string, but that's still an improvement over blank invalidation.
- state = InvalidateBuffer(C, state, Dst.Expression, *dstRegVal,
- /*IsSourceBuffer*/ false, nullptr);
+ state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
+ *dstRegVal, amountCopied,
+ C.getASTContext().getSizeType());
// Invalidate the source (const-invalidation without const-pointer-escaping
// the address of the top-level region).
- state = InvalidateBuffer(C, state, srcExpr.Expression, srcVal,
- /*IsSourceBuffer*/ true, nullptr);
+ state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
// Set the C string length of the destination, if we know it.
if (IsBounded && (appendK == ConcatFnKind::none)) {
@@ -2206,8 +2267,9 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
// Invalidate the search string, representing the change of one delimiter
// character to NUL.
- State = InvalidateBuffer(C, State, SearchStrPtr.Expression, Result,
- /*IsSourceBuffer*/ false, nullptr);
+ // As the replacement never overflows, do not invalidate its super region.
+ State = invalidateDestinationBufferNeverOverflows(
+ C, State, SearchStrPtr.Expression, Result);
// Overwrite the search string pointer. The new value is either an address
// further along in the same string, or NULL if there are no more tokens.
@@ -2256,8 +2318,10 @@ void CStringChecker::evalStdCopyCommon(CheckerContext &C,
// Invalidate the destination buffer
const Expr *Dst = CE->getArg(2);
SVal DstVal = State->getSVal(Dst, LCtx);
- State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false,
- /*Size=*/nullptr);
+ // FIXME: As we do not know how many items are copied, we also invalidate the
+ // super region containing the target location.
+ State =
+ invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
SValBuilder &SVB = C.getSValBuilder();
diff --git a/clang/test/Analysis/Inputs/system-header-simulator.h b/clang/test/Analysis/Inputs/system-header-simulator.h
index fb86c2f8ba6974..8924103f5046ea 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator.h
@@ -63,7 +63,9 @@ size_t strlen(const char *);
char *strcpy(char *restrict, const char *restrict);
char *strncpy(char *dst, const char *src, size_t n);
+char *strsep(char **stringp, const char *delim);
void *memcpy(void *dst, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
typedef unsigned long __darwin_pthread_key_t;
typedef __darwin_pthread_key_t pthread_key_t;
diff --git a/clang/test/Analysis/issue-55019.cpp b/clang/test/Analysis/issue-55019.cpp
new file mode 100644
index 00000000000000..dfeb00af3e47ed
--- /dev/null
+++ b/clang/test/Analysis/issue-55019.cpp
@@ -0,0 +1,89 @@
+// Refer issue 55019 for more details.
+// A supplemental test case of pr22954.c for other functions modeled in
+// the CStringChecker.
+
+// RUN: %clang_analyze_cc1 %s -verify \
+// RUN: -analyzer-checker=core \
+// RUN: -analyzer-checker=unix \
+// RUN: -analyzer-checker=debug.ExprInspection
+
+#include "Inputs/system-header-simulator.h"
+#include "Inputs/system-header-simulator-cxx.h"
+
+void *malloc(size_t);
+void free(void *);
+
+struct mystruct {
+ void *ptr;
+ char arr[4];
+};
+
+void clang_analyzer_dump(const void *);
+
+// CStringChecker::memsetAux
+void fmemset() {
+ mystruct x;
+ x.ptr = malloc(1);
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ memset(x.arr, 0, sizeof(x.arr));
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ free(x.ptr); // no-leak-warning
+}
+
+// CStringChecker::evalCopyCommon
+void fmemcpy() {
+ mystruct x;
+ x.ptr = malloc(1);
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ memcpy(x.arr, "hi", 2);
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ free(x.ptr); // no-leak-warning
+}
+
+// CStringChecker::evalStrcpyCommon
+void fstrcpy() {
+ mystruct x;
+ x.ptr = malloc(1);
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ strcpy(x.arr, "hi");
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ free(x.ptr); // no-leak-warning
+}
+
+void fstrncpy() {
+ mystruct x;
+ x.ptr = malloc(1);
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ strncpy(x.arr, "hi", sizeof(x.arr));
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ free(x.ptr); // no-leak-warning
+}
+
+// CStringChecker::evalStrsep
+void fstrsep() {
+ mystruct x;
+ x.ptr = malloc(1);
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ char *p = x.arr;
+ (void)strsep(&p, "x");
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+ free(x.ptr); // no-leak-warning
+}
+
+// CStringChecker::evalStdCopyCommon
+void fstdcopy() {
+ mystruct x;
+ x.ptr = new char;
+ clang_analyzer_dump(x.ptr); // expected-warning {{HeapSymRegion}}
+
+ const char *p = "x";
+ std::copy(p, p + 1, x.arr);
+
+ // FIXME: As we currently cannot know whether the copy overflows, the checker
+ // invalidates the entire `x` object. When the copy size through iterators
+ // can be correctly modeled, we can then update the verify direction from
+ // SymRegion to HeapSymRegion as this std::copy call never overflows and
+ // hence the pointer `x.ptr` shall not be invalidated.
+ clang_analyzer_dump(x.ptr); // expected-warning {{SymRegion}}
+ delete static_cast<char*>(x.ptr); // no-leak-warning
+}
diff --git a/clang/test/Analysis/pr22954.c b/clang/test/Analysis/pr22954.c
index e9b8884b08a407..3d1cac19720664 100644
--- a/clang/test/Analysis/pr22954.c
+++ b/clang/test/Analysis/pr22954.c
@@ -556,12 +556,13 @@ int f263(int n, char * len) {
x263.s2 = strdup("hello");
char input[] = {'a', 'b', 'c', 'd'};
memcpy(x263.s1, input, *(len + n));
- clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}}
+ clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}}\
+ expected-warning{{Potential leak of memory pointed to by 'x263.s2'}}
clang_analyzer_eval(x263.s1[1] == 0); // expected-warning{{UNKNOWN}}
clang_analyzer_eval(x263.s1[2] == 0); // expected-warning{{UNKNOWN}}
clang_analyzer_eval(x263.s1[3] == 0); // expected-warning{{UNKNOWN}}
clang_analyzer_eval(x263.s2 == 0); // expected-warning{{UNKNOWN}}
- return 0; // expected-warning{{Potential leak of memory pointed to by 'x263.s2'}}
+ return 0;
}
More information about the cfe-commits
mailing list