[clang] [analyzer] Refine invalidation caused by `fread` (PR #93408)
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Sun May 26 03:25:22 PDT 2024
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/93408
This change enables more accurate modeling of the write effects of `fread`. In particular, instead of invalidating the whole buffer, in a best-effort basis, we would try to invalidate the actually accesses elements of the buffer. This preserves the previous value of the buffer of the unaffected slots. As a result, diagnose more uninitialized buffer uses for example.
Currently, this refined invalidation only triggers for `fread` if and only if the `count` parameter and the buffer pointer's index component are concrete or perfectly-constrained symbols.
Additionally, if the `fread` would read more than 64 elements, the whole buffer is invalidated as before. This is to have safeguards against performance issues.
Refer to the comments of the assertions in the following example to see the changes in the diagnostics:
```c++
void demo() {
FILE *fp = fopen("/home/test", "rb+");
if (!fp) return;
int buffer[10]; // uninitialized
int read_items = fread(buffer+1, sizeof(int), 5, fp);
if (5 == read_items) {
int v1 = buffer[1]; // Unknown value but not garbage.
clang_analyzer_isTainted(v1); // expected-warning {{YES}} <-- Would be "NO" without this patch.
clang_analyzer_dump(v1); // expected-warning {{conj_}} <-- Not a "derived" symbol, so it's directly invalidated now.
int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before.
(void)(v1 + v0);
} else {
// If 'fread' had an error.
int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before.
(void)v0;
}
fclose(fp);
}
```
[CPP-3247](https://sonarsource.atlassian.net/browse/CPP-3247)
Patch by Marco Borgeaud (marco-antognini-sonarsource)
>From f9e841ddaa865d529c806b2d115d5ddbc7109243 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sun, 26 May 2024 11:40:01 +0200
Subject: [PATCH] [analyzer] Refine invalidation caused by `fread`
This change enables more accurate modeling of the write effects of `fread`.
In particular, instead of invalidating the whole buffer, in a best-effort
basis, we would try to invalidate the actually accesses elements of the buffer.
This preserves the previous value of the buffer of the unaffected slots.
As a result, diagnose more uninitialized buffer uses for example.
Currently, this refined invalidation only triggers for `fread` if and
only if the `count` parameter and the buffer pointer's index component
are concrete or perfectly-constrained symbols.
Additionally, if the `fread` would read more than 64 elements, the whole
buffer is invalidated as before. This is to have safeguards against
performance issues.
Refer to the comments of the assertions in the following example to see
the changes in the diagnostics:
```c++
void demo() {
FILE *fp = fopen("/home/test", "rb+");
if (!fp) return;
int buffer[10]; // uninitialized
int read_items = fread(buffer+1, sizeof(int), 5, fp);
if (5 == read_items) {
int v1 = buffer[1]; // Unknown value but not garbage.
clang_analyzer_isTainted(v1); // expected-warning {{YES}} <-- Would be "NO" without this patch.
clang_analyzer_dump(v1); // expected-warning {{conj_}} <-- Not a "derived" symbol, so it's directly invalidated now.
int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before.
(void)(v1 + v0);
} else {
// If 'fread' had an error.
int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before.
(void)v0;
}
fclose(fp);
}
```
[CPP-3247](https://sonarsource.atlassian.net/browse/CPP-3247)
Patch by Marco Borgeaud (marco-antognini-sonarsource)
---
.../StaticAnalyzer/Checkers/StreamChecker.cpp | 88 ++++-
clang/test/Analysis/fread.cpp | 328 ++++++++++++++++++
2 files changed, 405 insertions(+), 11 deletions(-)
create mode 100644 clang/test/Analysis/fread.cpp
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index d4e020f7a72a0..7b42c4f72b322 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -717,18 +717,71 @@ const ExplodedNode *StreamChecker::getAcquisitionSite(const ExplodedNode *N,
return nullptr;
}
+/// Invalidate only the requested elements instead of the whole buffer.
+/// This is basically a refinement of the more generic 'escapeArgs' or
+/// the plain old 'invalidateRegions'.
+/// This only works if the \p StartIndex and \p Count are concrete or
+/// perfectly-constrained.
+static ProgramStateRef
+escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
+ const CallEvent &Call, const MemRegion *Buffer,
+ QualType ElemType, SVal StartIndex, SVal Count) {
+ if (!llvm::isa_and_nonnull<SubRegion>(Buffer))
+ return State;
+
+ auto UnboxAsInt = [&C, &State](SVal V) -> std::optional<int64_t> {
+ auto &SVB = C.getSValBuilder();
+ if (const llvm::APSInt *Int = SVB.getKnownValue(State, V))
+ return Int->tryExtValue();
+ return std::nullopt;
+ };
+
+ auto StartIndexVal = UnboxAsInt(StartIndex);
+ auto CountVal = UnboxAsInt(Count);
+
+ // FIXME: Maybe we could make this more generic, and expose this by the
+ // 'invalidateRegions' API. After doing so, it might make sense to make this
+ // limit configurable.
+ constexpr int MaxInvalidatedElementsLimit = 64;
+ if (!StartIndexVal || !CountVal || *CountVal > MaxInvalidatedElementsLimit) {
+ return State->invalidateRegions({loc::MemRegionVal{Buffer}},
+ Call.getOriginExpr(), C.blockCount(),
+ C.getLocationContext(),
+ /*CausesPointerEscape=*/false);
+ }
+
+ constexpr auto DoNotInvalidateSuperRegion =
+ RegionAndSymbolInvalidationTraits::InvalidationKinds::
+ TK_DoNotInvalidateSuperRegion;
+
+ auto &RegionManager = Buffer->getMemRegionManager();
+ SmallVector<SVal> EscapingVals;
+ EscapingVals.reserve(*CountVal);
+
+ RegionAndSymbolInvalidationTraits ITraits;
+ for (auto Idx : llvm::seq(*StartIndexVal, *StartIndexVal + *CountVal)) {
+ NonLoc Index = C.getSValBuilder().makeArrayIndex(Idx);
+ const auto *Element = RegionManager.getElementRegion(
+ ElemType, Index, cast<SubRegion>(Buffer), C.getASTContext());
+ EscapingVals.push_back(loc::MemRegionVal(Element));
+ ITraits.setTrait(Element, DoNotInvalidateSuperRegion);
+ }
+ return State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
+ C.blockCount(), C.getLocationContext(),
+ /*CausesPointerEscape=*/false,
+ /*InvalidatedSymbols=*/nullptr, &Call,
+ &ITraits);
+}
+
static ProgramStateRef escapeArgs(ProgramStateRef State, CheckerContext &C,
const CallEvent &Call,
ArrayRef<unsigned int> EscapingArgs) {
- const auto *CE = Call.getOriginExpr();
-
- SmallVector<SVal> EscapingVals;
- EscapingVals.reserve(EscapingArgs.size());
- for (auto EscArgIdx : EscapingArgs)
- EscapingVals.push_back(Call.getArgSVal(EscArgIdx));
- State = State->invalidateRegions(EscapingVals, CE, C.blockCount(),
- C.getLocationContext(),
- /*CausesPointerEscape=*/false);
+ auto GetArgSVal = [&Call](int Idx) { return Call.getArgSVal(Idx); };
+ auto EscapingVals = to_vector(map_range(EscapingArgs, GetArgSVal));
+ State = State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
+ C.blockCount(), C.getLocationContext(),
+ /*CausesPointerEscape=*/false,
+ /*InvalidatedSymbols=*/nullptr);
return State;
}
@@ -937,8 +990,21 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
// At read, invalidate the buffer in any case of error or success,
// except if EOF was already present.
- if (IsFread && !E.isStreamEof())
- State = escapeArgs(State, C, Call, {0});
+ if (IsFread && !E.isStreamEof()) {
+ // Try to invalidate the individual elements.
+ if (const auto *BufferFirstElem =
+ dyn_cast_or_null<ElementRegion>(Call.getArgSVal(0).getAsRegion())) {
+ const MemRegion *Buffer = BufferFirstElem->getSuperRegion();
+ QualType ElemTy = BufferFirstElem->getElementType();
+ SVal FirstAccessedItem = BufferFirstElem->getIndex();
+ SVal ItemCount = Call.getArgSVal(2);
+ State = escapeByStartIndexAndCount(State, C, Call, Buffer, ElemTy,
+ FirstAccessedItem, ItemCount);
+ } else {
+ // Otherwise just fall back to invalidating the whole buffer.
+ State = escapeArgs(State, C, Call, {0});
+ }
+ }
// Generate a transition for the success state.
// If we know the state to be FEOF at fread, do not add a success state.
diff --git a/clang/test/Analysis/fread.cpp b/clang/test/Analysis/fread.cpp
new file mode 100644
index 0000000000000..2bf9baefe1395
--- /dev/null
+++ b/clang/test/Analysis/fread.cpp
@@ -0,0 +1,328 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN: -analyzer-checker=core,unix.Stream,alpha.security.taint \
+// RUN: -analyzer-checker=debug.ExprInspection
+
+#define EOF (-1)
+
+extern "C" {
+typedef __typeof(sizeof(int)) size_t;
+typedef struct _FILE FILE;
+
+FILE *fopen(const char *filename, const char *mode);
+int fclose(FILE *stream);
+size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
+int fgetc(FILE *stream);
+void *malloc(size_t size);
+}
+
+void clang_analyzer_dump(int);
+void clang_analyzer_isTainted(int);
+void clang_analyzer_warnIfReached();
+
+// A stream is only tracked by StreamChecker if it results from a call to "fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untracked_stream(FILE *fp) {
+ char c;
+ if (1 == fread(&c, 1, 1, fp)) {
+ char p = c; // Unknown value but not garbage and not modeled by checker.
+ } else {
+ char p = c; // Possibly indeterminate value but not modeled by checker.
+ }
+}
+
+void fgetc_props_taint() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ int c = fgetc(fp); // c is tainted.
+ if (c != EOF) {
+ clang_analyzer_isTainted(c); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void fread_props_taint() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ char buffer[10];
+ int c = fread(buffer, 1, 10, fp); // c is tainted.
+ if (c != 10) {
+ // If the read failed, then the number of bytes successfully read should be tainted.
+ clang_analyzer_isTainted(c); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void read_one_byte1() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ char c;
+ if (1 == fread(&c, 1, 1, fp)) {
+ char p = c; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = c; // Possibly indeterminate value but not modeled by checker.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void read_one_byte2(char *buffer) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (1 == fread(buffer, 1, 1, fp)) {
+ char p = buffer[0]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = buffer[0]; // Possibly indeterminate value but not modeled by checker.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void read_one_byte3(char *buffer) {
+ buffer[1] = 10;
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ // buffer[1] is not mutated by fread and remains not tainted.
+ fread(buffer, 1, 1, fp);
+ char p = buffer[1];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ clang_analyzer_dump(buffer[1]); // expected-warning{{derived_}} FIXME This should be 10.
+ fclose(fp);
+ }
+}
+
+void read_many_bytes(char *buffer) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (42 == fread(buffer, 1, 42, fp)) {
+ char p = buffer[0]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = buffer[0]; // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void random_access_write1(int index) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ long c[4];
+ bool success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+ switch (index) {
+ case 0:
+ // c[0] is not mutated by fread.
+ if (success) {
+ char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+ } else {
+ char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+ }
+ break;
+
+ case 1:
+ if (success) {
+ // Unknown value but not garbage.
+ clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+ clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+ } else {
+ // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+ clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+ }
+ break;
+
+ case 2:
+ if (success) {
+ long p = c[2]; // Unknown value but not garbage.
+ // FIXME: Taint analysis only marks the first byte of a memory region. See getPointeeOf in GenericTaintChecker.cpp.
+ clang_analyzer_isTainted(c[2]); // expected-warning {{NO}}
+ clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+ } else {
+ // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(c[2]); // expected-warning {{NO}} // FIXME: See above.
+ clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+ }
+ break;
+
+ case 3:
+ // c[3] is not mutated by fread.
+ if (success) {
+ long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+ } else {
+ long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ break;
+ }
+
+ fclose(fp);
+ }
+}
+
+void random_access_write2(bool b) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ int buffer[10];
+ int *ptr = buffer + 2;
+ if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+ if (b) {
+ int p = buffer[1]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ } else {
+ int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ fclose(fp);
+ }
+}
+
+void random_access_write_symbolic_count(size_t count) {
+ // Cover a case that used to crash (symbolic count).
+ if (count > 2)
+ return;
+
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ long c[4];
+ fread(c + 1, sizeof(long), count, fp);
+
+ // c[0] and c[3] are never mutated by fread, but because "count" is a symbolic value, the checker doesn't know that.
+ long p = c[0];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+ p = c[3];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+ p = c[1];
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+ fclose(fp);
+ }
+}
+
+void dynamic_random_access_write(int startIndex) {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ long buffer[10];
+ // Cannot reason about index.
+ size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+ if (5 == res) {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else if (res == 4) {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 1];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 2];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 3];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 4];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 5];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[0];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[0];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ }
+ fclose(fp);
+ }
+}
+
+struct S {
+ int a;
+ long b;
+};
+
+void comopund_write1() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ S s; // s.a is not touched by fread.
+ if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+ long p = s.b;
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ long p = s.b;
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ }
+ fclose(fp);
+ }
+}
+
+void comopund_write2() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ S s; // s.a is not touched by fread.
+ if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+ long p = s.a; // FIXME: This should raise an uninitialized read.
+ clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ long p = s.a; // FIXME: This should raise an uninitialized read.
+ clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ }
+ fclose(fp);
+ }
+}
+
+void var_write() {
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ int a, b; // 'a' is not touched by fread.
+ if (1 == fread(&b, sizeof(b), 1, fp)) {
+ long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+ } else {
+ long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+ }
+ fclose(fp);
+ }
+}
+
+// When reading a lot of data, invalidating all elements is too time-consuming.
+// Instead, the knowledge of the whole array is lost.
+#define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite in StreamChecker.cpp.
+#define PastMaxComplexity MaxInvalidatedElementRegion + 1
+void test_large_read() {
+ int buffer[PastMaxComplexity + 1];
+ buffer[PastMaxComplexity] = 42;
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ if (buffer[PastMaxComplexity] != 42) {
+ clang_analyzer_warnIfReached(); // Unreachable.
+ }
+ if (1 == fread(buffer, sizeof(int), PastMaxComplexity, fp)) {
+ if (buffer[PastMaxComplexity] != 42) {
+ clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+ }
+ }
+ fclose(fp);
+ }
+}
+
+void test_small_read() {
+ int buffer[10];
+ buffer[5] = 42;
+ if (FILE *fp = fopen("/home/test", "rb+")) {
+ clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+ if (1 == fread(buffer, sizeof(int), 5, fp)) {
+ clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+ }
+ fclose(fp);
+ }
+}
More information about the cfe-commits
mailing list