[clang] [analyzer] Refine invalidation caused by `fread` (PR #93408)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 7 05:32:48 PDT 2024


================
@@ -0,0 +1,443 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -triple x86_64-linux-gnu  \
+// RUN:   -analyzer-checker=core,unix.Stream,alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection
+
+#include "Inputs/system-header-simulator-for-simple-stream.h"
+
+#define EOF (-1)
+
+void clang_analyzer_dump(int);
+void clang_analyzer_isTainted(int);
+void clang_analyzer_warnIfReached(void);
+
+// A stream is only tracked by StreamChecker if it results from a call to "fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untracked_stream(FILE *fp) {
+  char c;
+  if (1 == fread(&c, 1, 1, fp)) {
+    char p = c; // Unknown value but not garbage and not modeled by checker.
+  } else {
+    char p = c; // Possibly indeterminate value but not modeled by checker.
+  }
+}
+
+void fgetc_props_taint(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    int c = fgetc(fp); // c is tainted.
+    if (c != EOF) {
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void fread_props_taint(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    char buffer[10];
+    int c = fread(buffer, 1, 10, fp); // c is tainted.
+    if (c != 10) {
+      // If the read failed, then the number of bytes successfully read should be tainted.
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte1(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    char c;
+    if (1 == fread(&c, 1, 1, fp)) {
+      char p = c; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = c; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte2(char *buffer) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    if (1 == fread(buffer, 1, 1, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte3(char *buffer) {
+  buffer[1] = 10;
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    // buffer[1] is not mutated by fread and remains not tainted.
+    fread(buffer, 1, 1, fp);
+    char p = buffer[1];
+    clang_analyzer_isTainted(p); // expected-warning{{NO}}
+    clang_analyzer_dump(buffer[1]); // expected-warning{{10 S32b}}
+    fclose(fp);
+  }
+}
+
+void read_many_bytes(char *buffer) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    if (42 == fread(buffer, 1, 42, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_read1(int index) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    long c[4];
+    int success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+    switch (index) {
+    case 0:
+      // c[0] is not mutated by fread.
+      if (success) {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+      } else {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+      }
+      break;
+
+    case 1:
+      if (success) {
+        // Unknown value but not garbage.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 2:
+      if (success) {
+        long p = c[2]; // Unknown value but not garbage.
+        // FIXME: Taint analysis only marks the first byte of a memory region. See getPointeeOf in GenericTaintChecker.cpp.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}}
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}} // FIXME: See above.
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 3:
+      // c[3] is not mutated by fread.
+      if (success) {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+      } else {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+      }
+      break;
+    }
+
+    fclose(fp);
+  }
+}
+
+void random_access_read2(int b) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    int buffer[10];
+    int *ptr = buffer + 2;
+    if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+      if (b) {
+        int p = buffer[1]; // Unknown value but not garbage.
+        clang_analyzer_isTainted(p); // expected-warning {{YES}}
+        clang_analyzer_dump(p); // expected-warning {{conj_}}
+      } else {
+        int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+      }
+    } else {
+      int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_read_symbolic_count(size_t count) {
+  // Cover a case that used to crash (symbolic count).
+  if (count > 2)
+    return;
+
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    long c[4];
+    fread(c + 1, sizeof(long), count, fp);
+
+    // c[0] and c[3] are never mutated by fread, but because "count" is a symbolic value, the checker doesn't know that.
+    long p = c[0];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[3];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[1];
+    clang_analyzer_isTainted(p); // expected-warning {{YES}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    fclose(fp);
+  }
+}
+
+void dynamic_random_access_read(int startIndex) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    long buffer[10];
+    // Cannot reason about index.
+    size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+    long *p = &buffer[startIndex];
+    long v = 0;
+
+    // If all 5 elements were successfully read, then all 5 elements should be tainted and considered initialized.
+    if (5 == res) {
+      // FIXME: These should be tainted.
+      clang_analyzer_isTainted((v = p[0])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[1])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[2])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[3])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[4])); // expected-warning {{NO}}
+      clang_analyzer_dump((v = p[0])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[1])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[2])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[3])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[4])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[5])); // expected-warning {{conj_}} FIXME: This should raise an uninit read.
+    } else if (res == 4) {
+      // If only the first 4 elements were successfully read,
+      // then only the first 4 elements should be tainted and considered initialized.
+      // FIXME: These should be tainted.
+      clang_analyzer_isTainted((v = p[0])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[1])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[2])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[3])); // expected-warning {{NO}}
+      clang_analyzer_dump((v = p[0])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[1])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[2])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[3])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[4])); // expected-warning {{conj_}} FIXME: This should raise an uninit read.
+    } else {
+      // Neither 5, or 4 elements were successfully read, so we must have read from 0 up to 3 elements.
+      // FIXME: These should be tainted.
+      clang_analyzer_isTainted((v = p[0])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[1])); // expected-warning {{NO}}
+      clang_analyzer_isTainted((v = p[2])); // expected-warning {{NO}}
+      clang_analyzer_dump((v = p[0])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[1])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[2])); // expected-warning {{conj_}} ok
+      clang_analyzer_dump((v = p[3])); // expected-warning {{conj_}} FIXME: This should raise an uninit read.
+    }
+    fclose(fp);
+  }
+}
+
+struct S {
+  int a;
+  long b;
+};
+
+void compound_read1(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    struct S s; // s.a is not touched by fread.
+    if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+      long p = s.b;
+      clang_analyzer_isTainted(p); // expected-warning {{YES}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = s.b;
+      clang_analyzer_isTainted(p); // expected-warning {{YES}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+void compound_read2(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    struct S s; // s.a is not touched by fread.
+    if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+      long p = s.a; // expected-warning {{Assigned value is garbage or undefined}}
+    } else {
+      long p = s.a; // expected-warning {{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+void var_read(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    int a, b; // 'a' is not touched by fread.
+    if (1 == fread(&b, sizeof(b), 1, fp)) {
+      long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+    } else {
+      long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+// When reading a lot of data, invalidating all elements is too time-consuming.
+// Instead, the knowledge of the whole array is lost.
+#define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite in StreamChecker.cpp.
+#define PastMaxComplexity MaxInvalidatedElementRegion + 1
+void test_large_read(void) {
+  int buffer[PastMaxComplexity + 1];
+  buffer[PastMaxComplexity] = 42;
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    if (buffer[PastMaxComplexity] != 42) {
+      clang_analyzer_warnIfReached(); // Unreachable.
+    }
+    if (1 == fread(buffer, sizeof(int), PastMaxComplexity, fp)) {
+      if (buffer[PastMaxComplexity] != 42) {
+        clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      }
+    }
+    fclose(fp);
+  }
+}
+
+void test_small_read(void) {
+  int buffer[10];
+  buffer[5] = 42;
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+    if (1 == fread(buffer, sizeof(int), 5, fp)) {
+      clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+    }
+    fclose(fp);
+  }
+}
+
+void test_partial_elements_read(void) {
+  clang_analyzer_dump(sizeof(int)); // expected-warning {{4 S32b}}
+
+  int buffer[100];
+  buffer[0] = 1;
+  buffer[1] = 2;
+  buffer[2] = 3;
+  buffer[3] = 4;
+  buffer[4] = 5;
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    // 3*5: 15 bytes read; which is not exactly 4 integers, thus we invalidate the whole buffer.
----------------
steakhal wrote:

I can't dump that because one byte (the last one) of that 'int' object is left uninitialized by the fread and reading uninit values will kill the execution path, making the rest of the checks unreachable.

This is caused by the current implementation which pretends that we only read into the first 4 elements of the buffer.

https://github.com/llvm/llvm-project/pull/93408


More information about the cfe-commits mailing list