[clang] [analyzer] Refine invalidation caused by `fread` (PR #93408)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 6 00:55:49 PDT 2024


================
@@ -0,0 +1,412 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -triple x86_64-linux-gnu  \
+// RUN:   -analyzer-checker=core,unix.Stream,alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection
+
+#include "Inputs/system-header-simulator-for-simple-stream.h"
+
+#define EOF (-1)
+
+void clang_analyzer_dump(int);
+void clang_analyzer_isTainted(int);
+void clang_analyzer_warnIfReached(void);
+
+// A stream is only tracked by StreamChecker if it results from a call to "fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untracked_stream(FILE *fp) {
+  char c;
+  if (1 == fread(&c, 1, 1, fp)) {
+    char p = c; // Unknown value but not garbage and not modeled by checker.
+  } else {
+    char p = c; // Possibly indeterminate value but not modeled by checker.
+  }
+}
+
+void fgetc_props_taint(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    int c = fgetc(fp); // c is tainted.
+    if (c != EOF) {
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void fread_props_taint(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    char buffer[10];
+    int c = fread(buffer, 1, 10, fp); // c is tainted.
+    if (c != 10) {
+      // If the read failed, then the number of bytes successfully read should be tainted.
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte1(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    char c;
+    if (1 == fread(&c, 1, 1, fp)) {
+      char p = c; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = c; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte2(char *buffer) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    if (1 == fread(buffer, 1, 1, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte3(char *buffer) {
+  buffer[1] = 10;
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    // buffer[1] is not mutated by fread and remains not tainted.
+    fread(buffer, 1, 1, fp);
+    char p = buffer[1];
+    clang_analyzer_isTainted(p); // expected-warning{{NO}}
+    clang_analyzer_dump(buffer[1]); // expected-warning{{10 S32b}}
+    fclose(fp);
+  }
+}
+
+void read_many_bytes(char *buffer) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    if (42 == fread(buffer, 1, 42, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_write1(int index) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    long c[4];
+    int success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+    switch (index) {
+    case 0:
+      // c[0] is not mutated by fread.
+      if (success) {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+      } else {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+      }
+      break;
+
+    case 1:
+      if (success) {
+        // Unknown value but not garbage.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 2:
+      if (success) {
+        long p = c[2]; // Unknown value but not garbage.
+        // FIXME: Taint analysis only marks the first byte of a memory region. See getPointeeOf in GenericTaintChecker.cpp.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}}
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}} // FIXME: See above.
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 3:
+      // c[3] is not mutated by fread.
+      if (success) {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+      } else {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+      }
+      break;
+    }
+
+    fclose(fp);
+  }
+}
+
+void random_access_write2(int b) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    int buffer[10];
+    int *ptr = buffer + 2;
+    if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+      if (b) {
+        int p = buffer[1]; // Unknown value but not garbage.
+        clang_analyzer_isTainted(p); // expected-warning {{YES}}
+        clang_analyzer_dump(p); // expected-warning {{conj_}}
+      } else {
+        int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+      }
+    } else {
+      int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_write_symbolic_count(size_t count) {
+  // Cover a case that used to crash (symbolic count).
+  if (count > 2)
+    return;
+
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    long c[4];
+    fread(c + 1, sizeof(long), count, fp);
+
+    // c[0] and c[3] are never mutated by fread, but because "count" is a symbolic value, the checker doesn't know that.
+    long p = c[0];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[3];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[1];
+    clang_analyzer_isTainted(p); // expected-warning {{YES}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    fclose(fp);
+  }
+}
+
+void dynamic_random_access_write(int startIndex) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    long buffer[10];
+    // Cannot reason about index.
+    size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+    if (5 == res) {
----------------
steakhal wrote:

I'm just upstreaming this hunk. I wasn't here when it was written.
My guess what these could test is the fact that we write to the buffer from a symbolic index, up to 5 `long` values.
The engine should deduce from this that at the `buffer[startIndex + 5]` we definitely have uninitialized values - However, as it's currently implemented right now, we can't. It is because I only handle the case when the index is concrete.

W.r.t. why we have different branches depending on the return value of `read`, my guess would be that if we modeled that it only read 4 elements, then the   `buffer[startIndex + 4]` and subsequent elements should be uninitialized, etc.

I'll rework the test case to covey this interpretation.

https://github.com/llvm/llvm-project/pull/93408


More information about the cfe-commits mailing list