[clang] [analyzer] Refine invalidation caused by `fread` (PR #93408)
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 6 00:45:19 PDT 2024
================
@@ -0,0 +1,412 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN: -triple x86_64-linux-gnu \
+// RUN: -analyzer-checker=core,unix.Stream,alpha.security.taint \
+// RUN: -analyzer-checker=debug.ExprInspection
+
+#include "Inputs/system-header-simulator-for-simple-stream.h"
+
+#define EOF (-1)
+
+void clang_analyzer_dump(int);
+void clang_analyzer_isTainted(int);
+void clang_analyzer_warnIfReached(void);
+
+// A stream is only tracked by StreamChecker if it results from a call to "fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untracked_stream(FILE *fp) {
+ char c;
+ if (1 == fread(&c, 1, 1, fp)) {
+ char p = c; // Unknown value but not garbage and not modeled by checker.
+ } else {
+ char p = c; // Possibly indeterminate value but not modeled by checker.
+ }
+}
+
+void fgetc_props_taint(void) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ int c = fgetc(fp); // c is tainted.
+ if (c != EOF) {
+ clang_analyzer_isTainted(c); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void fread_props_taint(void) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ char buffer[10];
+ int c = fread(buffer, 1, 10, fp); // c is tainted.
+ if (c != 10) {
+ // If the read failed, then the number of bytes successfully read should be tainted.
+ clang_analyzer_isTainted(c); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void read_one_byte1(void) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ char c;
+ if (1 == fread(&c, 1, 1, fp)) {
+ char p = c; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = c; // Possibly indeterminate value but not modeled by checker.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void read_one_byte2(char *buffer) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ if (1 == fread(buffer, 1, 1, fp)) {
+ char p = buffer[0]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = buffer[0]; // Possibly indeterminate value but not modeled by checker.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void read_one_byte3(char *buffer) {
+ buffer[1] = 10;
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ // buffer[1] is not mutated by fread and remains not tainted.
+ fread(buffer, 1, 1, fp);
+ char p = buffer[1];
+ clang_analyzer_isTainted(p); // expected-warning{{NO}}
+ clang_analyzer_dump(buffer[1]); // expected-warning{{10 S32b}}
+ fclose(fp);
+ }
+}
+
+void read_many_bytes(char *buffer) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ if (42 == fread(buffer, 1, 42, fp)) {
+ char p = buffer[0]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ } else {
+ char p = buffer[0]; // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(p); // expected-warning{{YES}}
+ }
+ fclose(fp);
+ }
+}
+
+void random_access_write1(int index) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ long c[4];
+ int success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+ switch (index) {
+ case 0:
+ // c[0] is not mutated by fread.
+ if (success) {
+ char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+ } else {
+ char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+ }
+ break;
+
+ case 1:
+ if (success) {
+ // Unknown value but not garbage.
+ clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+ clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+ } else {
+ // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+ clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+ }
+ break;
+
+ case 2:
+ if (success) {
+ long p = c[2]; // Unknown value but not garbage.
+ // FIXME: Taint analysis only marks the first byte of a memory region. See getPointeeOf in GenericTaintChecker.cpp.
+ clang_analyzer_isTainted(c[2]); // expected-warning {{NO}}
+ clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+ } else {
+ // Possibly indeterminate value but not modeled.
+ clang_analyzer_isTainted(c[2]); // expected-warning {{NO}} // FIXME: See above.
+ clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+ }
+ break;
+
+ case 3:
+ // c[3] is not mutated by fread.
+ if (success) {
+ long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+ } else {
+ long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ break;
+ }
+
+ fclose(fp);
+ }
+}
+
+void random_access_write2(int b) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ int buffer[10];
+ int *ptr = buffer + 2;
+ if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+ if (b) {
+ int p = buffer[1]; // Unknown value but not garbage.
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ } else {
+ int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ fclose(fp);
+ }
+}
+
+void random_access_write_symbolic_count(size_t count) {
+ // Cover a case that used to crash (symbolic count).
+ if (count > 2)
+ return;
+
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ long c[4];
+ fread(c + 1, sizeof(long), count, fp);
+
+ // c[0] and c[3] are never mutated by fread, but because "count" is a symbolic value, the checker doesn't know that.
+ long p = c[0];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+ p = c[3];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+ p = c[1];
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+ fclose(fp);
+ }
+}
+
+void dynamic_random_access_write(int startIndex) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ long buffer[10];
+ // Cannot reason about index.
+ size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+ if (5 == res) {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else if (res == 4) {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 1];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 2];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 3];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 4];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[startIndex + 5];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[0];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ long p = buffer[startIndex];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ p = buffer[0];
+ clang_analyzer_isTainted(p); // expected-warning {{NO}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ }
+ fclose(fp);
+ }
+}
+
+struct S {
+ int a;
+ long b;
+};
+
+void compound_write1(void) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ struct S s; // s.a is not touched by fread.
+ if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+ long p = s.b;
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ } else {
+ long p = s.b;
+ clang_analyzer_isTainted(p); // expected-warning {{YES}}
+ clang_analyzer_dump(p); // expected-warning {{conj_}}
+ }
+ fclose(fp);
+ }
+}
+
+void compound_write2(void) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ struct S s; // s.a is not touched by fread.
+ if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+ long p = s.a; // expected-warning {{Assigned value is garbage or undefined}}
+ } else {
+ long p = s.a; // expected-warning {{Assigned value is garbage or undefined}}
+ }
+ fclose(fp);
+ }
+}
+
+void var_write(void) {
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ int a, b; // 'a' is not touched by fread.
+ if (1 == fread(&b, sizeof(b), 1, fp)) {
+ long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+ } else {
+ long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+ }
+ fclose(fp);
+ }
+}
+
+// When reading a lot of data, invalidating all elements is too time-consuming.
+// Instead, the knowledge of the whole array is lost.
+#define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite in StreamChecker.cpp.
+#define PastMaxComplexity MaxInvalidatedElementRegion + 1
+void test_large_read(void) {
+ int buffer[PastMaxComplexity + 1];
+ buffer[PastMaxComplexity] = 42;
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ if (buffer[PastMaxComplexity] != 42) {
+ clang_analyzer_warnIfReached(); // Unreachable.
+ }
+ if (1 == fread(buffer, sizeof(int), PastMaxComplexity, fp)) {
+ if (buffer[PastMaxComplexity] != 42) {
+ clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+ }
+ }
+ fclose(fp);
+ }
+}
+
+void test_small_read(void) {
+ int buffer[10];
+ buffer[5] = 42;
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+ if (1 == fread(buffer, sizeof(int), 5, fp)) {
+ clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+ }
+ fclose(fp);
+ }
+}
+
+void test_partial_elements_read(void) {
+ clang_analyzer_dump(sizeof(int)); // expected-warning {{4 S32b}}
+
+ int buffer[100];
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ // 3*5: 15 bytes read; which is not exactly 4 integers, thus we invalidate the whole buffer.
+ if (5 == fread(buffer + 1, 3, 5, fp)) {
+ clang_analyzer_dump(buffer[0]); // expected-warning{{derived_}}
+ } else {
+ clang_analyzer_dump(buffer[0]); // expected-warning{{derived_}}
+ }
+ fclose(fp);
+ }
+}
+
+void test_whole_elements_read(void) {
+ clang_analyzer_dump(sizeof(int)); // expected-warning {{4 S32b}}
+
+ int buffer[100];
+ buffer[0] = 1;
+ buffer[15] = 2;
+ buffer[16] = 3;
+ FILE *fp = fopen("/home/test", "rb+");
+ if (fp) {
+ // 3*20: 60 bytes read; which is basically 15 integers.
+ if (20 == fread(buffer + 1, 3, 20, fp)) {
+ clang_analyzer_dump(buffer[0]); // expected-warning{{1 S32b}}
+ clang_analyzer_dump(buffer[20]); // expected-warning{{conj_}}
+ clang_analyzer_dump(buffer[21]); // expected-warning{{1st function call argument is an uninitialized value}}
----------------
steakhal wrote:
Nice catch. I made a bug when calling `escapeByStartIndexAndCount`. I should have passed `NumElementsRead` instead of `CountVal`. Thanks for noticing this!
https://github.com/llvm/llvm-project/pull/93408
More information about the cfe-commits
mailing list