[clang] [analyzer] Refine invalidation caused by `fread` (PR #93408)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 3 07:07:48 PDT 2024


https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/93408

>From f9e841ddaa865d529c806b2d115d5ddbc7109243 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sun, 26 May 2024 11:40:01 +0200
Subject: [PATCH 1/8] [analyzer] Refine invalidation caused by `fread`

This change enables more accurate modeling of the write effects of `fread`.
In particular, instead of invalidating the whole buffer, in a best-effort
basis, we would try to invalidate the actually accesses elements of the buffer.
This preserves the previous value of the buffer of the unaffected slots.
As a result, diagnose more uninitialized buffer uses for example.

Currently, this refined invalidation only triggers for `fread` if and
only if the `count` parameter and the buffer pointer's index component
are concrete or perfectly-constrained symbols.
Additionally, if the `fread` would read more than 64 elements, the whole
buffer is invalidated as before. This is to have safeguards against
performance issues.

Refer to the comments of the assertions in the following example to see
the changes in the diagnostics:

```c++
void demo() {
  FILE *fp = fopen("/home/test", "rb+");
  if (!fp) return;
  int buffer[10]; // uninitialized
  int read_items = fread(buffer+1, sizeof(int), 5, fp);
  if (5 == read_items) {
    int v1 = buffer[1]; // Unknown value but not garbage.
    clang_analyzer_isTainted(v1); // expected-warning {{YES}} <-- Would be "NO" without this patch.
    clang_analyzer_dump(v1); // expected-warning {{conj_}} <-- Not a "derived" symbol, so it's directly invalidated now.
    int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before.
    (void)(v1 + v0);
  } else {
    // If 'fread' had an error.
    int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}} <-- Had no report here before.
    (void)v0;
  }
  fclose(fp);
}
```

[CPP-3247](https://sonarsource.atlassian.net/browse/CPP-3247)

Patch by Marco Borgeaud (marco-antognini-sonarsource)
---
 .../StaticAnalyzer/Checkers/StreamChecker.cpp |  88 ++++-
 clang/test/Analysis/fread.cpp                 | 328 ++++++++++++++++++
 2 files changed, 405 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/Analysis/fread.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index d4e020f7a72a0..7b42c4f72b322 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -717,18 +717,71 @@ const ExplodedNode *StreamChecker::getAcquisitionSite(const ExplodedNode *N,
   return nullptr;
 }
 
+/// Invalidate only the requested elements instead of the whole buffer.
+/// This is basically a refinement of the more generic 'escapeArgs' or
+/// the plain old 'invalidateRegions'.
+/// This only works if the \p StartIndex and \p Count are concrete or
+/// perfectly-constrained.
+static ProgramStateRef
+escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
+                           const CallEvent &Call, const MemRegion *Buffer,
+                           QualType ElemType, SVal StartIndex, SVal Count) {
+  if (!llvm::isa_and_nonnull<SubRegion>(Buffer))
+    return State;
+
+  auto UnboxAsInt = [&C, &State](SVal V) -> std::optional<int64_t> {
+    auto &SVB = C.getSValBuilder();
+    if (const llvm::APSInt *Int = SVB.getKnownValue(State, V))
+      return Int->tryExtValue();
+    return std::nullopt;
+  };
+
+  auto StartIndexVal = UnboxAsInt(StartIndex);
+  auto CountVal = UnboxAsInt(Count);
+
+  // FIXME: Maybe we could make this more generic, and expose this by the
+  // 'invalidateRegions' API. After doing so, it might make sense to make this
+  // limit configurable.
+  constexpr int MaxInvalidatedElementsLimit = 64;
+  if (!StartIndexVal || !CountVal || *CountVal > MaxInvalidatedElementsLimit) {
+    return State->invalidateRegions({loc::MemRegionVal{Buffer}},
+                                    Call.getOriginExpr(), C.blockCount(),
+                                    C.getLocationContext(),
+                                    /*CausesPointerEscape=*/false);
+  }
+
+  constexpr auto DoNotInvalidateSuperRegion =
+      RegionAndSymbolInvalidationTraits::InvalidationKinds::
+          TK_DoNotInvalidateSuperRegion;
+
+  auto &RegionManager = Buffer->getMemRegionManager();
+  SmallVector<SVal> EscapingVals;
+  EscapingVals.reserve(*CountVal);
+
+  RegionAndSymbolInvalidationTraits ITraits;
+  for (auto Idx : llvm::seq(*StartIndexVal, *StartIndexVal + *CountVal)) {
+    NonLoc Index = C.getSValBuilder().makeArrayIndex(Idx);
+    const auto *Element = RegionManager.getElementRegion(
+        ElemType, Index, cast<SubRegion>(Buffer), C.getASTContext());
+    EscapingVals.push_back(loc::MemRegionVal(Element));
+    ITraits.setTrait(Element, DoNotInvalidateSuperRegion);
+  }
+  return State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
+                                  C.blockCount(), C.getLocationContext(),
+                                  /*CausesPointerEscape=*/false,
+                                  /*InvalidatedSymbols=*/nullptr, &Call,
+                                  &ITraits);
+}
+
 static ProgramStateRef escapeArgs(ProgramStateRef State, CheckerContext &C,
                                   const CallEvent &Call,
                                   ArrayRef<unsigned int> EscapingArgs) {
-  const auto *CE = Call.getOriginExpr();
-
-  SmallVector<SVal> EscapingVals;
-  EscapingVals.reserve(EscapingArgs.size());
-  for (auto EscArgIdx : EscapingArgs)
-    EscapingVals.push_back(Call.getArgSVal(EscArgIdx));
-  State = State->invalidateRegions(EscapingVals, CE, C.blockCount(),
-                                   C.getLocationContext(),
-                                   /*CausesPointerEscape=*/false);
+  auto GetArgSVal = [&Call](int Idx) { return Call.getArgSVal(Idx); };
+  auto EscapingVals = to_vector(map_range(EscapingArgs, GetArgSVal));
+  State = State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
+                                   C.blockCount(), C.getLocationContext(),
+                                   /*CausesPointerEscape=*/false,
+                                   /*InvalidatedSymbols=*/nullptr);
   return State;
 }
 
@@ -937,8 +990,21 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
 
   // At read, invalidate the buffer in any case of error or success,
   // except if EOF was already present.
-  if (IsFread && !E.isStreamEof())
-    State = escapeArgs(State, C, Call, {0});
+  if (IsFread && !E.isStreamEof()) {
+    // Try to invalidate the individual elements.
+    if (const auto *BufferFirstElem =
+            dyn_cast_or_null<ElementRegion>(Call.getArgSVal(0).getAsRegion())) {
+      const MemRegion *Buffer = BufferFirstElem->getSuperRegion();
+      QualType ElemTy = BufferFirstElem->getElementType();
+      SVal FirstAccessedItem = BufferFirstElem->getIndex();
+      SVal ItemCount = Call.getArgSVal(2);
+      State = escapeByStartIndexAndCount(State, C, Call, Buffer, ElemTy,
+                                         FirstAccessedItem, ItemCount);
+    } else {
+      // Otherwise just fall back to invalidating the whole buffer.
+      State = escapeArgs(State, C, Call, {0});
+    }
+  }
 
   // Generate a transition for the success state.
   // If we know the state to be FEOF at fread, do not add a success state.
diff --git a/clang/test/Analysis/fread.cpp b/clang/test/Analysis/fread.cpp
new file mode 100644
index 0000000000000..2bf9baefe1395
--- /dev/null
+++ b/clang/test/Analysis/fread.cpp
@@ -0,0 +1,328 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=core,unix.Stream,alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection
+
+#define EOF (-1)
+
+extern "C" {
+typedef __typeof(sizeof(int)) size_t;
+typedef struct _FILE FILE;
+
+FILE *fopen(const char *filename, const char *mode);
+int fclose(FILE *stream);
+size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
+int fgetc(FILE *stream);
+void *malloc(size_t size);
+}
+
+void clang_analyzer_dump(int);
+void clang_analyzer_isTainted(int);
+void clang_analyzer_warnIfReached();
+
+// A stream is only tracked by StreamChecker if it results from a call to "fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untracked_stream(FILE *fp) {
+  char c;
+  if (1 == fread(&c, 1, 1, fp)) {
+    char p = c; // Unknown value but not garbage and not modeled by checker.
+  } else {
+    char p = c; // Possibly indeterminate value but not modeled by checker.
+  }
+}
+
+void fgetc_props_taint() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    int c = fgetc(fp); // c is tainted.
+    if (c != EOF) {
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void fread_props_taint() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    char buffer[10];
+    int c = fread(buffer, 1, 10, fp); // c is tainted.
+    if (c != 10) {
+      // If the read failed, then the number of bytes successfully read should be tainted.
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte1() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    char c;
+    if (1 == fread(&c, 1, 1, fp)) {
+      char p = c; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = c; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte2(char *buffer) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    if (1 == fread(buffer, 1, 1, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte3(char *buffer) {
+  buffer[1] = 10;
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    // buffer[1] is not mutated by fread and remains not tainted.
+    fread(buffer, 1, 1, fp);
+    char p = buffer[1];
+    clang_analyzer_isTainted(p); // expected-warning{{NO}}
+    clang_analyzer_dump(buffer[1]); // expected-warning{{derived_}} FIXME This should be 10.
+    fclose(fp);
+  }
+}
+
+void read_many_bytes(char *buffer) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    if (42 == fread(buffer, 1, 42, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_write1(int index) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    long c[4];
+    bool success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+    switch (index) {
+    case 0:
+      // c[0] is not mutated by fread.
+      if (success) {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+      } else {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or undefined}} We kept the first byte intact.
+      }
+      break;
+
+    case 1:
+      if (success) {
+        // Unknown value but not garbage.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 2:
+      if (success) {
+        long p = c[2]; // Unknown value but not garbage.
+        // FIXME: Taint analysis only marks the first byte of a memory region. See getPointeeOf in GenericTaintChecker.cpp.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}}
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}} // FIXME: See above.
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 3:
+      // c[3] is not mutated by fread.
+      if (success) {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+      } else {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or undefined}}
+      }
+      break;
+    }
+
+    fclose(fp);
+  }
+}
+
+void random_access_write2(bool b) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    int buffer[10];
+    int *ptr = buffer + 2;
+    if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+      if (b) {
+        int p = buffer[1]; // Unknown value but not garbage.
+        clang_analyzer_isTainted(p); // expected-warning {{YES}}
+        clang_analyzer_dump(p); // expected-warning {{conj_}}
+      } else {
+        int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+      }
+    } else {
+      int p = buffer[0]; // expected-warning {{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_write_symbolic_count(size_t count) {
+  // Cover a case that used to crash (symbolic count).
+  if (count > 2)
+    return;
+
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    long c[4];
+    fread(c + 1, sizeof(long), count, fp);
+
+    // c[0] and c[3] are never mutated by fread, but because "count" is a symbolic value, the checker doesn't know that.
+    long p = c[0];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[3];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[1];
+    clang_analyzer_isTainted(p); // expected-warning {{YES}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    fclose(fp);
+  }
+}
+
+void dynamic_random_access_write(int startIndex) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    long buffer[10];
+    // Cannot reason about index.
+    size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+    if (5 == res) {
+      long p = buffer[startIndex];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else if (res == 4) {
+      long p = buffer[startIndex];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 1];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 2];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 3];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 4];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 5];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[0];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = buffer[startIndex];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[0];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+struct S {
+  int a;
+  long b;
+};
+
+void comopund_write1() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    S s; // s.a is not touched by fread.
+    if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+      long p = s.b;
+      clang_analyzer_isTainted(p); // expected-warning {{YES}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = s.b;
+      clang_analyzer_isTainted(p); // expected-warning {{YES}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+void comopund_write2() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    S s; // s.a is not touched by fread.
+    if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+      long p = s.a; // FIXME: This should raise an uninitialized read.
+      clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = s.a; // FIXME: This should raise an uninitialized read.
+      clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+void var_write() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    int a, b; // 'a' is not touched by fread.
+    if (1 == fread(&b, sizeof(b), 1, fp)) {
+      long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+    } else {
+      long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+// When reading a lot of data, invalidating all elements is too time-consuming.
+// Instead, the knowledge of the whole array is lost.
+#define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite in StreamChecker.cpp.
+#define PastMaxComplexity MaxInvalidatedElementRegion + 1
+void test_large_read() {
+  int buffer[PastMaxComplexity + 1];
+  buffer[PastMaxComplexity] = 42;
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    if (buffer[PastMaxComplexity] != 42) {
+      clang_analyzer_warnIfReached(); // Unreachable.
+    }
+    if (1 == fread(buffer, sizeof(int), PastMaxComplexity, fp)) {
+      if (buffer[PastMaxComplexity] != 42) {
+        clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      }
+    }
+    fclose(fp);
+  }
+}
+
+void test_small_read() {
+  int buffer[10];
+  buffer[5] = 42;
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+    if (1 == fread(buffer, sizeof(int), 5, fp)) {
+      clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+    }
+    fclose(fp);
+  }
+}

>From c46aa42fd256008f710c5c20b8cf8a7c7680e2ad Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 12:58:34 +0200
Subject: [PATCH 2/8] NFC Prefer dyn_cast over isa&cast

https://github.com/llvm/llvm-project/pull/93408#discussion_r1615909522
---
 clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 7b42c4f72b322..02603243b06fa 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -726,7 +726,8 @@ static ProgramStateRef
 escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
                            const CallEvent &Call, const MemRegion *Buffer,
                            QualType ElemType, SVal StartIndex, SVal Count) {
-  if (!llvm::isa_and_nonnull<SubRegion>(Buffer))
+  const auto *BufferAsRegion = dyn_cast_or_null<SubRegion>(Buffer);
+  if (!BufferAsRegion)
     return State;
 
   auto UnboxAsInt = [&C, &State](SVal V) -> std::optional<int64_t> {
@@ -744,7 +745,7 @@ escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
   // limit configurable.
   constexpr int MaxInvalidatedElementsLimit = 64;
   if (!StartIndexVal || !CountVal || *CountVal > MaxInvalidatedElementsLimit) {
-    return State->invalidateRegions({loc::MemRegionVal{Buffer}},
+    return State->invalidateRegions({loc::MemRegionVal{BufferAsRegion}},
                                     Call.getOriginExpr(), C.blockCount(),
                                     C.getLocationContext(),
                                     /*CausesPointerEscape=*/false);
@@ -754,7 +755,7 @@ escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
       RegionAndSymbolInvalidationTraits::InvalidationKinds::
           TK_DoNotInvalidateSuperRegion;
 
-  auto &RegionManager = Buffer->getMemRegionManager();
+  auto &RegionManager = BufferAsRegion->getMemRegionManager();
   SmallVector<SVal> EscapingVals;
   EscapingVals.reserve(*CountVal);
 
@@ -762,7 +763,7 @@ escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
   for (auto Idx : llvm::seq(*StartIndexVal, *StartIndexVal + *CountVal)) {
     NonLoc Index = C.getSValBuilder().makeArrayIndex(Idx);
     const auto *Element = RegionManager.getElementRegion(
-        ElemType, Index, cast<SubRegion>(Buffer), C.getASTContext());
+        ElemType, Index, BufferAsRegion, C.getASTContext());
     EscapingVals.push_back(loc::MemRegionVal(Element));
     ITraits.setTrait(Element, DoNotInvalidateSuperRegion);
   }

>From 79ea47ada37c3a7462c9de45d6fc55de80330340 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 12:59:36 +0200
Subject: [PATCH 3/8] NFC Spell out std::optional

https://github.com/llvm/llvm-project/pull/93408#discussion_r1615913100
---
 clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 02603243b06fa..1f4f76fa89979 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -737,8 +737,8 @@ escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
     return std::nullopt;
   };
 
-  auto StartIndexVal = UnboxAsInt(StartIndex);
-  auto CountVal = UnboxAsInt(Count);
+  std::optional<int64_t> StartIndexVal = UnboxAsInt(StartIndex);
+  std::optional<int64_t> CountVal = UnboxAsInt(Count);
 
   // FIXME: Maybe we could make this more generic, and expose this by the
   // 'invalidateRegions' API. After doing so, it might make sense to make this

>From 0ab63f1a181428a4b23bfdfe32a440f708c78d9e Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 13:00:21 +0200
Subject: [PATCH 4/8] NFC Fix test name typos

https://github.com/llvm/llvm-project/pull/93408#discussion_r1615919113
https://github.com/llvm/llvm-project/pull/93408#discussion_r1615919288
---
 clang/test/Analysis/fread.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Analysis/fread.cpp b/clang/test/Analysis/fread.cpp
index 2bf9baefe1395..66488b2a0d780 100644
--- a/clang/test/Analysis/fread.cpp
+++ b/clang/test/Analysis/fread.cpp
@@ -251,7 +251,7 @@ struct S {
   long b;
 };
 
-void comopund_write1() {
+void compound_write1() {
   if (FILE *fp = fopen("/home/test", "rb+")) {
     S s; // s.a is not touched by fread.
     if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
@@ -267,7 +267,7 @@ void comopund_write1() {
   }
 }
 
-void comopund_write2() {
+void compound_write2() {
   if (FILE *fp = fopen("/home/test", "rb+")) {
     S s; // s.a is not touched by fread.
     if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {

>From f9142be372af24ea3ea4b2ebbdd1ce18d37b5d03 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 13:10:56 +0200
Subject: [PATCH 5/8] NFC Move the test file to C

https://github.com/llvm/llvm-project/pull/93408#discussion_r1615930654
---
 ...ystem-header-simulator-for-simple-stream.h |  3 +
 clang/test/Analysis/{fread.cpp => fread.c}    | 84 ++++++++++---------
 2 files changed, 48 insertions(+), 39 deletions(-)
 rename clang/test/Analysis/{fread.cpp => fread.c} (87%)

diff --git a/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h b/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
index c26d358214912..47adf8e23a117 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-for-simple-stream.h
@@ -5,12 +5,15 @@
 // suppressed.
 #pragma clang system_header
 
+typedef __typeof(sizeof(int)) size_t;
 typedef struct _FILE {
   unsigned char *_p;
 } FILE;
 FILE *fopen(const char *restrict, const char *restrict) __asm("_" "fopen" );
 int fputc(int, FILE *);
 int fputs(const char *restrict, FILE *restrict) __asm("_" "fputs" );
+size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
+int fgetc(FILE *stream);
 int fclose(FILE *);
 void exit(int);
 
diff --git a/clang/test/Analysis/fread.cpp b/clang/test/Analysis/fread.c
similarity index 87%
rename from clang/test/Analysis/fread.cpp
rename to clang/test/Analysis/fread.c
index 66488b2a0d780..1ed27a757bd1e 100644
--- a/clang/test/Analysis/fread.cpp
+++ b/clang/test/Analysis/fread.c
@@ -2,22 +2,13 @@
 // RUN:   -analyzer-checker=core,unix.Stream,alpha.security.taint \
 // RUN:   -analyzer-checker=debug.ExprInspection
 
-#define EOF (-1)
-
-extern "C" {
-typedef __typeof(sizeof(int)) size_t;
-typedef struct _FILE FILE;
+#include "Inputs/system-header-simulator-for-simple-stream.h"
 
-FILE *fopen(const char *filename, const char *mode);
-int fclose(FILE *stream);
-size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
-int fgetc(FILE *stream);
-void *malloc(size_t size);
-}
+#define EOF (-1)
 
 void clang_analyzer_dump(int);
 void clang_analyzer_isTainted(int);
-void clang_analyzer_warnIfReached();
+void clang_analyzer_warnIfReached(void);
 
 // A stream is only tracked by StreamChecker if it results from a call to "fopen".
 // Otherwise, there is no specific modelling of "fread".
@@ -30,8 +21,9 @@ void untracked_stream(FILE *fp) {
   }
 }
 
-void fgetc_props_taint() {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+void fgetc_props_taint(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     int c = fgetc(fp); // c is tainted.
     if (c != EOF) {
       clang_analyzer_isTainted(c); // expected-warning{{YES}}
@@ -40,8 +32,9 @@ void fgetc_props_taint() {
   }
 }
 
-void fread_props_taint() {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+void fread_props_taint(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     char buffer[10];
     int c = fread(buffer, 1, 10, fp); // c is tainted.
     if (c != 10) {
@@ -52,8 +45,9 @@ void fread_props_taint() {
   }
 }
 
-void read_one_byte1() {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+void read_one_byte1(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     char c;
     if (1 == fread(&c, 1, 1, fp)) {
       char p = c; // Unknown value but not garbage.
@@ -67,7 +61,8 @@ void read_one_byte1() {
 }
 
 void read_one_byte2(char *buffer) {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     if (1 == fread(buffer, 1, 1, fp)) {
       char p = buffer[0]; // Unknown value but not garbage.
       clang_analyzer_isTainted(p); // expected-warning{{YES}}
@@ -81,7 +76,8 @@ void read_one_byte2(char *buffer) {
 
 void read_one_byte3(char *buffer) {
   buffer[1] = 10;
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     // buffer[1] is not mutated by fread and remains not tainted.
     fread(buffer, 1, 1, fp);
     char p = buffer[1];
@@ -92,7 +88,8 @@ void read_one_byte3(char *buffer) {
 }
 
 void read_many_bytes(char *buffer) {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     if (42 == fread(buffer, 1, 42, fp)) {
       char p = buffer[0]; // Unknown value but not garbage.
       clang_analyzer_isTainted(p); // expected-warning{{YES}}
@@ -105,9 +102,10 @@ void read_many_bytes(char *buffer) {
 }
 
 void random_access_write1(int index) {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     long c[4];
-    bool success = 2 == fread(c + 1, sizeof(long), 2, fp);
+    int success = 2 == fread(c + 1, sizeof(long), 2, fp);
 
     switch (index) {
     case 0:
@@ -158,8 +156,9 @@ void random_access_write1(int index) {
   }
 }
 
-void random_access_write2(bool b) {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+void random_access_write2(int b) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     int buffer[10];
     int *ptr = buffer + 2;
     if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
@@ -182,7 +181,8 @@ void random_access_write_symbolic_count(size_t count) {
   if (count > 2)
     return;
 
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     long c[4];
     fread(c + 1, sizeof(long), count, fp);
 
@@ -204,7 +204,8 @@ void random_access_write_symbolic_count(size_t count) {
 }
 
 void dynamic_random_access_write(int startIndex) {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     long buffer[10];
     // Cannot reason about index.
     size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
@@ -251,9 +252,10 @@ struct S {
   long b;
 };
 
-void compound_write1() {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
-    S s; // s.a is not touched by fread.
+void compound_write1(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    struct S s; // s.a is not touched by fread.
     if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
       long p = s.b;
       clang_analyzer_isTainted(p); // expected-warning {{YES}}
@@ -267,9 +269,10 @@ void compound_write1() {
   }
 }
 
-void compound_write2() {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
-    S s; // s.a is not touched by fread.
+void compound_write2(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    struct S s; // s.a is not touched by fread.
     if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
       long p = s.a; // FIXME: This should raise an uninitialized read.
       clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
@@ -283,8 +286,9 @@ void compound_write2() {
   }
 }
 
-void var_write() {
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+void var_write(void) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     int a, b; // 'a' is not touched by fread.
     if (1 == fread(&b, sizeof(b), 1, fp)) {
       long p = a; // expected-warning{{Assigned value is garbage or undefined}}
@@ -299,10 +303,11 @@ void var_write() {
 // Instead, the knowledge of the whole array is lost.
 #define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite in StreamChecker.cpp.
 #define PastMaxComplexity MaxInvalidatedElementRegion + 1
-void test_large_read() {
+void test_large_read(void) {
   int buffer[PastMaxComplexity + 1];
   buffer[PastMaxComplexity] = 42;
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     if (buffer[PastMaxComplexity] != 42) {
       clang_analyzer_warnIfReached(); // Unreachable.
     }
@@ -315,10 +320,11 @@ void test_large_read() {
   }
 }
 
-void test_small_read() {
+void test_small_read(void) {
   int buffer[10];
   buffer[5] = 42;
-  if (FILE *fp = fopen("/home/test", "rb+")) {
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
     clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
     if (1 == fread(buffer, sizeof(int), 5, fp)) {
       clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}

>From 034d1a1b7a1943f4050b9a147b53c9993a65f8c8 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 13:40:17 +0200
Subject: [PATCH 6/8] NFC Reuse existing variable

---
 clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 1f4f76fa89979..215834e89f9b9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -998,9 +998,8 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
       const MemRegion *Buffer = BufferFirstElem->getSuperRegion();
       QualType ElemTy = BufferFirstElem->getElementType();
       SVal FirstAccessedItem = BufferFirstElem->getIndex();
-      SVal ItemCount = Call.getArgSVal(2);
       State = escapeByStartIndexAndCount(State, C, Call, Buffer, ElemTy,
-                                         FirstAccessedItem, ItemCount);
+                                         FirstAccessedItem, *NMembVal);
     } else {
       // Otherwise just fall back to invalidating the whole buffer.
       State = escapeArgs(State, C, Call, {0});

>From c8e5f543e6d519231cd47d3150edca2903f4a660 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 15:33:38 +0200
Subject: [PATCH 7/8] Handle TypedValueRegions and SymbolicRegions at offset 0

---
 .../Core/PathSensitive/MemRegion.h            |   2 +-
 .../StaticAnalyzer/Checkers/StreamChecker.cpp | 149 ++++++++++++------
 clang/lib/StaticAnalyzer/Core/MemRegion.cpp   |   6 +-
 clang/test/Analysis/fread.c                   |  10 +-
 4 files changed, 104 insertions(+), 63 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 151d3e57c1cb8..e53d60de6f35d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -1503,7 +1503,7 @@ class MemRegionManager {
   ///  associated element type, index, and super region.
   const ElementRegion *getElementRegion(QualType elementType, NonLoc Idx,
                                         const SubRegion *superRegion,
-                                        ASTContext &Ctx);
+                                        const ASTContext &Ctx);
 
   const ElementRegion *getElementRegionWithSuper(const ElementRegion *ER,
                                                  const SubRegion *superRegion) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 215834e89f9b9..dc685b0c56b07 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -717,61 +717,45 @@ const ExplodedNode *StreamChecker::getAcquisitionSite(const ExplodedNode *N,
   return nullptr;
 }
 
+static std::optional<int64_t> getKnownValue(ProgramStateRef State, SVal V) {
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  if (const llvm::APSInt *Int = SVB.getKnownValue(State, V))
+    return Int->tryExtValue();
+  return std::nullopt;
+}
+
 /// Invalidate only the requested elements instead of the whole buffer.
 /// This is basically a refinement of the more generic 'escapeArgs' or
 /// the plain old 'invalidateRegions'.
-/// This only works if the \p StartIndex and \p Count are concrete or
-/// perfectly-constrained.
 static ProgramStateRef
-escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
-                           const CallEvent &Call, const MemRegion *Buffer,
-                           QualType ElemType, SVal StartIndex, SVal Count) {
-  const auto *BufferAsRegion = dyn_cast_or_null<SubRegion>(Buffer);
-  if (!BufferAsRegion)
-    return State;
-
-  auto UnboxAsInt = [&C, &State](SVal V) -> std::optional<int64_t> {
-    auto &SVB = C.getSValBuilder();
-    if (const llvm::APSInt *Int = SVB.getKnownValue(State, V))
-      return Int->tryExtValue();
-    return std::nullopt;
-  };
-
-  std::optional<int64_t> StartIndexVal = UnboxAsInt(StartIndex);
-  std::optional<int64_t> CountVal = UnboxAsInt(Count);
-
-  // FIXME: Maybe we could make this more generic, and expose this by the
-  // 'invalidateRegions' API. After doing so, it might make sense to make this
-  // limit configurable.
-  constexpr int MaxInvalidatedElementsLimit = 64;
-  if (!StartIndexVal || !CountVal || *CountVal > MaxInvalidatedElementsLimit) {
-    return State->invalidateRegions({loc::MemRegionVal{BufferAsRegion}},
-                                    Call.getOriginExpr(), C.blockCount(),
-                                    C.getLocationContext(),
-                                    /*CausesPointerEscape=*/false);
-  }
-
+escapeByStartIndexAndCount(ProgramStateRef State, const CallEvent &Call,
+                           unsigned BlockCount, const SubRegion *Buffer,
+                           QualType ElemType, int64_t StartIndex,
+                           int64_t ElementCount) {
   constexpr auto DoNotInvalidateSuperRegion =
       RegionAndSymbolInvalidationTraits::InvalidationKinds::
           TK_DoNotInvalidateSuperRegion;
 
-  auto &RegionManager = BufferAsRegion->getMemRegionManager();
+  const LocationContext *LCtx = Call.getLocationContext();
+  const ASTContext &Ctx = State->getStateManager().getContext();
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  auto &RegionManager = Buffer->getMemRegionManager();
+
   SmallVector<SVal> EscapingVals;
-  EscapingVals.reserve(*CountVal);
+  EscapingVals.reserve(ElementCount);
 
   RegionAndSymbolInvalidationTraits ITraits;
-  for (auto Idx : llvm::seq(*StartIndexVal, *StartIndexVal + *CountVal)) {
-    NonLoc Index = C.getSValBuilder().makeArrayIndex(Idx);
-    const auto *Element = RegionManager.getElementRegion(
-        ElemType, Index, BufferAsRegion, C.getASTContext());
+  for (auto Idx : llvm::seq(StartIndex, StartIndex + ElementCount)) {
+    NonLoc Index = SVB.makeArrayIndex(Idx);
+    const auto *Element =
+        RegionManager.getElementRegion(ElemType, Index, Buffer, Ctx);
     EscapingVals.push_back(loc::MemRegionVal(Element));
     ITraits.setTrait(Element, DoNotInvalidateSuperRegion);
   }
-  return State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
-                                  C.blockCount(), C.getLocationContext(),
-                                  /*CausesPointerEscape=*/false,
-                                  /*InvalidatedSymbols=*/nullptr, &Call,
-                                  &ITraits);
+  return State->invalidateRegions(
+      EscapingVals, Call.getOriginExpr(), BlockCount, LCtx,
+      /*CausesPointerEscape=*/false,
+      /*InvalidatedSymbols=*/nullptr, &Call, &ITraits);
 }
 
 static ProgramStateRef escapeArgs(ProgramStateRef State, CheckerContext &C,
@@ -961,6 +945,73 @@ void StreamChecker::preWrite(const FnDescription *Desc, const CallEvent &Call,
   C.addTransition(State);
 }
 
+static std::optional<QualType> getPointeeType(const MemRegion *R) {
+  if (!R)
+    return std::nullopt;
+  if (const auto *ER = dyn_cast<ElementRegion>(R))
+    return ER->getElementType();
+  if (const auto *TR = dyn_cast<TypedValueRegion>(R))
+    return TR->getValueType();
+  if (const auto *SR = dyn_cast<SymbolicRegion>(R))
+    return SR->getPointeeStaticType();
+  return std::nullopt;
+}
+
+static std::optional<NonLoc> getStartIndex(SValBuilder &SVB,
+                                           const MemRegion *R) {
+  if (!R)
+    return std::nullopt;
+
+  auto Zero = [&SVB] {
+    BasicValueFactory &BVF = SVB.getBasicValueFactory();
+    return nonloc::ConcreteInt(BVF.getIntValue(0, /*isUnsigned=*/false));
+  };
+
+  if (const auto *ER = dyn_cast<ElementRegion>(R))
+    return ER->getIndex();
+  if (const auto *TR = dyn_cast<TypedValueRegion>(R))
+    return Zero();
+  if (const auto *SR = dyn_cast<SymbolicRegion>(R))
+    return Zero();
+  return std::nullopt;
+}
+
+static ProgramStateRef
+tryToInvalidateFReadBufferByElements(ProgramStateRef State, CheckerContext &C,
+                                     const CallEvent &Call, NonLoc SizeVal,
+                                     NonLoc NMembVal) {
+  // Try to invalidate the individual elements.
+  const auto *Buffer =
+      dyn_cast_or_null<SubRegion>(Call.getArgSVal(0).getAsRegion());
+
+  std::optional<QualType> ElemTy = getPointeeType(Buffer);
+  std::optional<SVal> StartElementIndex =
+      getStartIndex(C.getSValBuilder(), Buffer);
+
+  // Drop the outermost ElementRegion to get the buffer.
+  if (const auto *ER = dyn_cast_or_null<ElementRegion>(Buffer))
+    Buffer = dyn_cast<SubRegion>(ER->getSuperRegion());
+
+  std::optional<int64_t> CountVal = getKnownValue(State, NMembVal);
+  std::optional<int64_t> Size = getKnownValue(State, SizeVal);
+  std::optional<int64_t> StartIndexVal =
+      getKnownValue(State, StartElementIndex.value_or(UnknownVal()));
+
+  if (ElemTy && CountVal && Size && StartIndexVal) {
+    int64_t NumBytesRead = Size.value() * CountVal.value();
+    int64_t ElemSizeInChars =
+        C.getASTContext().getTypeSizeInChars(*ElemTy).getQuantity();
+    bool DivisibleAccessSpan = (NumBytesRead % ElemSizeInChars) == 0;
+    int64_t NumElementsRead = NumBytesRead / ElemSizeInChars;
+    constexpr int MaxInvalidatedElementsLimit = 64;
+    if (DivisibleAccessSpan && NumElementsRead <= MaxInvalidatedElementsLimit) {
+      return escapeByStartIndexAndCount(State, Call, C.blockCount(), Buffer,
+                                        *ElemTy, *StartIndexVal, *CountVal);
+    }
+  }
+  return nullptr;
+}
+
 void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
                                     const CallEvent &Call, CheckerContext &C,
                                     bool IsFread) const {
@@ -993,17 +1044,11 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc,
   // except if EOF was already present.
   if (IsFread && !E.isStreamEof()) {
     // Try to invalidate the individual elements.
-    if (const auto *BufferFirstElem =
-            dyn_cast_or_null<ElementRegion>(Call.getArgSVal(0).getAsRegion())) {
-      const MemRegion *Buffer = BufferFirstElem->getSuperRegion();
-      QualType ElemTy = BufferFirstElem->getElementType();
-      SVal FirstAccessedItem = BufferFirstElem->getIndex();
-      State = escapeByStartIndexAndCount(State, C, Call, Buffer, ElemTy,
-                                         FirstAccessedItem, *NMembVal);
-    } else {
-      // Otherwise just fall back to invalidating the whole buffer.
-      State = escapeArgs(State, C, Call, {0});
-    }
+    // Otherwise just fall back to invalidating the whole buffer.
+    ProgramStateRef InvalidatedState = tryToInvalidateFReadBufferByElements(
+        State, C, Call, *SizeVal, *NMembVal);
+    State =
+        InvalidatedState ? InvalidatedState : escapeArgs(State, C, Call, {0});
   }
 
   // Generate a transition for the success state.
diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index d6e4f23cc353f..6fe929b1cb94a 100644
--- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -1155,10 +1155,10 @@ MemRegionManager::getCompoundLiteralRegion(const CompoundLiteralExpr *CL,
   return getSubRegion<CompoundLiteralRegion>(CL, sReg);
 }
 
-const ElementRegion*
+const ElementRegion *
 MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx,
-                                   const SubRegion* superRegion,
-                                   ASTContext &Ctx){
+                                   const SubRegion *superRegion,
+                                   const ASTContext &Ctx) {
   QualType T = Ctx.getCanonicalType(elementType).getUnqualifiedType();
 
   llvm::FoldingSetNodeID ID;
diff --git a/clang/test/Analysis/fread.c b/clang/test/Analysis/fread.c
index 1ed27a757bd1e..23d97a1b1dae6 100644
--- a/clang/test/Analysis/fread.c
+++ b/clang/test/Analysis/fread.c
@@ -82,7 +82,7 @@ void read_one_byte3(char *buffer) {
     fread(buffer, 1, 1, fp);
     char p = buffer[1];
     clang_analyzer_isTainted(p); // expected-warning{{NO}}
-    clang_analyzer_dump(buffer[1]); // expected-warning{{derived_}} FIXME This should be 10.
+    clang_analyzer_dump(buffer[1]); // expected-warning{{10 S32b}}
     fclose(fp);
   }
 }
@@ -274,13 +274,9 @@ void compound_write2(void) {
   if (fp) {
     struct S s; // s.a is not touched by fread.
     if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
-      long p = s.a; // FIXME: This should raise an uninitialized read.
-      clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
-      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      long p = s.a; // expected-warning {{Assigned value is garbage or undefined}}
     } else {
-      long p = s.a; // FIXME: This should raise an uninitialized read.
-      clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This should be YES.
-      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      long p = s.a; // expected-warning {{Assigned value is garbage or undefined}}
     }
     fclose(fp);
   }

>From cf90c7b51d62e8f5e46dc387d71dc7c4a9388509 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Mon, 3 Jun 2024 16:07:19 +0200
Subject: [PATCH 8/8] Demonstrate weird fread handling

---
 clang/test/Analysis/fread.c | 82 +++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/clang/test/Analysis/fread.c b/clang/test/Analysis/fread.c
index 23d97a1b1dae6..8647a0442c99c 100644
--- a/clang/test/Analysis/fread.c
+++ b/clang/test/Analysis/fread.c
@@ -1,4 +1,5 @@
 // RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -triple x86_64-linux-gnu  \
 // RUN:   -analyzer-checker=core,unix.Stream,alpha.security.taint \
 // RUN:   -analyzer-checker=debug.ExprInspection
 
@@ -328,3 +329,84 @@ void test_small_read(void) {
     fclose(fp);
   }
 }
+
+void test_partial_elements_read(void) {
+  clang_analyzer_dump(sizeof(int)); // expected-warning {{4 S32b}}
+
+  int buffer[100];
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    // 3*5: 15 bytes read; which is not exactly 4 integers, thus we invalidate the whole buffer.
+    if (5 == fread(buffer + 1, 3, 5, fp)) {
+      clang_analyzer_dump(buffer[0]); // expected-warning{{derived_}}
+    } else {
+      clang_analyzer_dump(buffer[0]); // expected-warning{{derived_}}
+    }
+    fclose(fp);
+  }
+}
+
+void test_whole_elements_read(void) {
+  clang_analyzer_dump(sizeof(int)); // expected-warning {{4 S32b}}
+
+  int buffer[100];
+  buffer[0] = 1;
+  buffer[15] = 2;
+  buffer[16] = 3;
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    // 3*20: 60 bytes read; which is basically 15 integers.
+    if (20 == fread(buffer + 1, 3, 20, fp)) {
+      clang_analyzer_dump(buffer[0]); // expected-warning{{1 S32b}}
+      clang_analyzer_dump(buffer[20]); // expected-warning{{conj_}}
+      clang_analyzer_dump(buffer[21]); // expected-warning{{1st function call argument is an uninitialized value}}
+    } else {
+      clang_analyzer_dump(buffer[0]); // expected-warning{{1 S32b}}
+      clang_analyzer_dump(buffer[20]); // expected-warning{{conj_}}
+      clang_analyzer_dump(buffer[21]); // expected-warning{{1st function call argument is an uninitialized value}}
+    }
+    fclose(fp);
+  }
+}
+
+void test_unaligned_start_read(void) {
+  clang_analyzer_dump(sizeof(int)); // expected-warning {{4 S32b}}
+
+  int buffer[100];
+  buffer[0] = 3;
+  buffer[1] = 4;
+  buffer[2] = 5;
+  char *asChar = (char*)buffer;
+
+  FILE *fp = fopen("/home/test", "rb+");
+  if (fp) {
+    // We have an 'int' binding at offset 0 of value 3.
+    // We read 4 bytes at byte offset: 1,2,3,4.
+    if (4 == fread(asChar + 1, 1, 4, fp)) {
+      void clang_analyzer_printState(void);
+      clang_analyzer_printState();
+      clang_analyzer_dump(buffer[0]); // expected-warning{{3 S32b}} FIXME Reading a 'char' should not result in a 'S32b' value.
+      clang_analyzer_dump(buffer[1]); // expected-warning{{conj_}}
+      clang_analyzer_dump(buffer[2]); // expected-warning{{5 S32b}}
+
+      clang_analyzer_dump(asChar[0]); // expected-warning{{3 S32b}} FIXME Reading a 'char' should not result in a 'S32b' value.
+      clang_analyzer_dump(asChar[1]); // expected-warning{{conj_}} 1
+      clang_analyzer_dump(asChar[2]); // expected-warning{{conj_}} 2
+      clang_analyzer_dump(asChar[3]); // expected-warning{{conj_}} 3
+      clang_analyzer_dump(asChar[4]); // expected-warning{{conj_}} 4
+      clang_analyzer_dump(asChar[5]); // expected-warning{{1st function call argument is an uninitialized value}}
+    } else {
+      clang_analyzer_dump(buffer[0]); // expected-warning{{3 S32b}} FIXME Reading a 'char' should not result in a 'S32b' value.
+      clang_analyzer_dump(buffer[1]); // expected-warning{{conj_}}
+      clang_analyzer_dump(buffer[2]); // expected-warning{{5 S32b}}
+
+      clang_analyzer_dump(asChar[0]); // expected-warning{{3 S32b}} FIXME Reading a 'char' should not result in a 'S32b' value.
+      clang_analyzer_dump(asChar[1]); // expected-warning{{conj_}} 1
+      clang_analyzer_dump(asChar[2]); // expected-warning{{conj_}} 2
+      clang_analyzer_dump(asChar[3]); // expected-warning{{conj_}} 3
+      clang_analyzer_dump(asChar[4]); // expected-warning{{conj_}} 4
+      clang_analyzer_dump(asChar[5]); // expected-warning{{1st function call argument is an uninitialized value}}
+    }
+    fclose(fp);
+  }
+}



More information about the cfe-commits mailing list