[clang] [analyzer] Check the correct first and last elements in cstring.UninitializedRead (PR #95408)

Kristóf Umann via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 1 04:25:58 PDT 2024


https://github.com/Szelethus updated https://github.com/llvm/llvm-project/pull/95408

>From 90cedd519da8b76c686db9c3f824b6d044e16eb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Thu, 25 Apr 2024 17:31:24 +0200
Subject: [PATCH 1/6] [analyzer] Check the correct first and last elements in
 cstring.UninitializedRead

I intend to fix this checker up so that we can move it out of alpha. I
made a bunch of analyses, and found many similar false positives:

```c++
int t[] = {1,2,3};
memcpy(dst, t, sizeof(t) / sizeof(t[0])); // warn
```

The problem here is the way CStringChecker checks whether the
destination and source buffers are initialized: heuristically, it only
checks the first and last element. This is fine, however, it retrieves
these elements as characters, even if the underlaying object is not a
character array. Reading the last byte of an integer is undefined, so
the checker emits a bug here.

A quick search tells you the rationale: "Both objects are reinterpreted
as arrays of unsigned char.". But the static analyzer right now can't
check byte-by-byte if a memory region is _initialized_, it can only
check if its a well-defined character or not.

In this patch, I pry the original array out of the arguments to memcpy
(and similar functions), and retrieve the actual first and last elements
according to the array's actual element type.

Currently, my improvements reduced the number of reports to 29 on these
projects: memcached,tmux,curl,twin,vim,openssl,sqlite,ffmpeg,postgres

https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_upper_bound_patched&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100

Before my patch, there were 87.

https://codechecker-demo.eastus.cloudapp.azure.com/Default/reports?detection-status=New&detection-status=Reopened&detection-status=Unresolved&is-unique=on&run=%2acstring_uninit_baseline&newcheck=%2acstring_uninit_upper_bounds_patched&diff-type=New&checker-name=alpha.unix.cstring.UninitializedRead&items-per-page=100

Change-Id: I0ef16beed735201af857abb6ed02de764af2e78a
---
 .../Core/PathSensitive/MemRegion.h            |  14 ++
 .../Checkers/CStringChecker.cpp               | 226 +++++++++++++++---
 clang/test/Analysis/bstring_UninitRead.c      |  66 ++++-
 3 files changed, 258 insertions(+), 48 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index c0d3fbd0eb961a..9e9f2f4fc1a0e5 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -34,6 +34,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <cstdint>
 #include <limits>
@@ -99,6 +100,8 @@ class MemRegion : public llvm::FoldingSetNode {
 #define REGION(Id, Parent) Id ## Kind,
 #define REGION_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
 #include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+#undef REGION_RANGE
   };
 
 private:
@@ -171,6 +174,17 @@ class MemRegion : public llvm::FoldingSetNode {
 
   Kind getKind() const { return kind; }
 
+  StringRef getKindStr() const {
+    switch (getKind()) {
+#define REGION(Id, Parent)                                                     \
+  case Id##Kind:                                                               \
+    return #Id;
+#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+    }
+    llvm_unreachable("Unkown kind!");
+  }
+
   template<typename RegionTy> const RegionTy* getAs() const;
   template <typename RegionTy>
   LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy *castAs() const;
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 238e87a712a43a..fdea2e01215b44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InterCheckerAPI.h"
+#include "clang/AST/OperationKinds.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
@@ -22,10 +23,13 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/raw_ostream.h"
 #include <functional>
 #include <optional>
@@ -304,6 +308,10 @@ class CStringChecker : public Checker< eval::Call,
   // Re-usable checks
   ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
                                AnyArgExpr Arg, SVal l) const;
+  // Check whether the origin region behind \p Element (like the actual array
+  // region \p Element is from) is initialized.
+  ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
+                            AnyArgExpr Buffer, SVal Element, SVal Size) const;
   ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
                                 AnyArgExpr Buffer, SVal Element,
                                 AccessKind Access,
@@ -329,7 +337,7 @@ class CStringChecker : public Checker< eval::Call,
                          const Stmt *S, StringRef WarningMsg) const;
   void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
   void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
-                             const Expr *E) const;
+                                const Expr *E, StringRef Msg) const;
   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
                                             ProgramStateRef state,
                                             NonLoc left,
@@ -393,6 +401,173 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
   return stateNonNull;
 }
 
+static std::optional<NonLoc> getIndex(ProgramStateRef State,
+                                      const ElementRegion *ER, CharKind CK) {
+  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SValBuilder.getContext();
+
+  if (CK == CharKind::Regular) {
+    if (ER->getValueType() != Ctx.CharTy)
+      return {};
+    return ER->getIndex();
+  }
+
+  if (ER->getValueType() != Ctx.WideCharTy)
+    return {};
+
+  QualType SizeTy = Ctx.getSizeType();
+  NonLoc WideSize =
+      SValBuilder
+          .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+                      SizeTy)
+          .castAs<NonLoc>();
+  SVal Offset =
+      SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+  if (Offset.isUnknown())
+    return {};
+  return Offset.castAs<NonLoc>();
+}
+
+// Try to get hold of the origin regin (e.g. the actual array region from an
+// element region).
+static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
+  const MemRegion *MR = ER->getSuperRegion();
+  const MemRegion *Ret = MR;
+  assert(MR);
+  if (const auto *sym = MR->getAs<SymbolicRegion>()) {
+    SymbolRef sym2 = sym->getSymbol();
+    if (!sym2)
+      return nullptr;
+    Ret = sym2->getOriginRegion();
+  }
+  if (const auto *element = MR->getAs<ElementRegion>()) {
+    Ret = element->getBaseRegion();
+  }
+  return dyn_cast_or_null<TypedValueRegion>(Ret);
+}
+
+// Basically 1 -> 1st, 12 -> 12th, etc.
+static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
+  Os << Idx << llvm::getOrdinalSuffix(Idx);
+}
+
+ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
+                                          ProgramStateRef State,
+                                          AnyArgExpr Buffer, SVal Element,
+                                          SVal Size) const {
+
+  // If a previous check has failed, propagate the failure.
+  if (!State)
+    return nullptr;
+
+  const MemRegion *R = Element.getAsRegion();
+  if (!R)
+    return State;
+
+  const auto *ER = dyn_cast<ElementRegion>(R);
+  if (!ER)
+    return State;
+
+  const TypedValueRegion *Orig = getOriginRegion(ER);
+  if (!Orig)
+    return State;
+
+  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SValBuilder.getContext();
+
+  // FIXME: We ought to able to check objects as well. Maybe
+  // UninitializedObjectChecker could help?
+  if (!Orig->getValueType()->isArrayType())
+    return State;
+
+  const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
+  const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+
+  SVal FirstElementVal =
+      State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
+  if (!isa<Loc>(FirstElementVal))
+    return State;
+
+  // Ensure that we wouldn't read uninitialized value.
+  if (Filter.CheckCStringUninitializedRead &&
+      State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) {
+    llvm::SmallString<258> Buf;
+    llvm::raw_svector_ostream OS(Buf);
+    OS << "The first element of the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument is undefined";
+    emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+    return nullptr;
+  }
+
+  // We won't check whether the entire region is fully initialized -- lets just
+  // check that the first and the last element is. So, onto checking the last
+  // element:
+  const QualType IdxTy = SValBuilder.getArrayIndexType();
+
+  NonLoc ElemSize =
+      SValBuilder
+          .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+          .castAs<NonLoc>();
+
+  // FIXME: Check that the size arg to the cstring function is divisible by
+  // size of the actual element type?
+
+  // The type of the argument to the cstring function is either char or wchar,
+  // but thats not the type of the original array (or memory region).
+  // Suppose the following:
+  //   int t[5];
+  //   memcpy(dst, t, sizeof(t) / sizeof(t[0]));
+  // When checking whether t is fully initialized, we see it as char array of
+  // size sizeof(int)*5. If we check the last element as a character, we read
+  // the last byte of an integer, which will be undefined. But just because
+  // that value is undefined, it doesn't mean that the element is uninitialized!
+  // For this reason, we need to retrieve the actual last element with the
+  // correct type.
+
+  // Divide the size argument to the cstring function by the actual element
+  // type. This value will be size of the array, or the index to the
+  // past-the-end element.
+  std::optional<NonLoc> Offset =
+      SValBuilder
+          .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+                       IdxTy)
+          .getAs<NonLoc>();
+
+  // Retrieve the index of the last element.
+  const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
+  SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+
+  if (!Offset)
+    return State;
+
+  SVal LastElementVal =
+      State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig));
+  if (!isa<Loc>(LastElementVal))
+    return State;
+
+  if (Filter.CheckCStringUninitializedRead &&
+      State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
+    const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
+    // If we can't get emit a sensible last element index, just bail out --
+    // prefer to emit nothing in favour of emitting garbage quality reports.
+    if (!IdxInt) {
+      C.addSink();
+      return nullptr;
+    }
+    llvm::SmallString<258> Buf;
+    llvm::raw_svector_ostream OS(Buf);
+    OS << "The last element of the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument to access (the ";
+    printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
+    OS << ") is undefined";
+    emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+    return nullptr;
+  }
+  return State;
+}
+
 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
                                               ProgramStateRef state,
@@ -413,30 +588,10 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
   if (!ER)
     return state;
 
-  SValBuilder &svalBuilder = C.getSValBuilder();
-  ASTContext &Ctx = svalBuilder.getContext();
-
   // Get the index of the accessed element.
-  NonLoc Idx = ER->getIndex();
-
-  if (CK == CharKind::Regular) {
-    if (ER->getValueType() != Ctx.CharTy)
-      return state;
-  } else {
-    if (ER->getValueType() != Ctx.WideCharTy)
-      return state;
-
-    QualType SizeTy = Ctx.getSizeType();
-    NonLoc WideSize =
-        svalBuilder
-            .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
-                        SizeTy)
-            .castAs<NonLoc>();
-    SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
-    if (Offset.isUnknown())
-      return state;
-    Idx = Offset.castAs<NonLoc>();
-  }
+  std::optional<NonLoc> Idx = getIndex(state, ER, CK);
+  if (!Idx)
+    return state;
 
   // Get the size of the array.
   const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
@@ -444,7 +599,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
       getDynamicExtent(state, superReg, C.getSValBuilder());
 
   ProgramStateRef StInBound, StOutBound;
-  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
+  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(*Idx, Size);
   if (StOutBound && !StInBound) {
     // These checks are either enabled by the CString out-of-bounds checker
     // explicitly or implicitly by the Malloc checker.
@@ -459,15 +614,6 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
     return nullptr;
   }
 
-  // Ensure that we wouldn't read uninitialized value.
-  if (Access == AccessKind::read) {
-    if (Filter.CheckCStringUninitializedRead &&
-        StInBound->getSVal(ER).isUndef()) {
-      emitUninitializedReadBug(C, StInBound, Buffer.Expression);
-      return nullptr;
-    }
-  }
-
   // Array bound check succeeded.  From this point forward the array bound
   // should always succeed.
   return StInBound;
@@ -502,6 +648,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
 
   // Check if the first byte of the buffer is accessible.
   State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
+
   if (!State)
     return nullptr;
 
@@ -526,6 +673,8 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
     SVal BufEnd =
         svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
     State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
+    if (Access == AccessKind::read)
+      State = checkInit(C, State, Buffer, BufEnd, *Length);
 
     // If the buffer isn't large enough, abort.
     if (!State)
@@ -694,16 +843,17 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
 
 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
                                               ProgramStateRef State,
-                                              const Expr *E) const {
+                                              const Expr *E,
+                                              StringRef Msg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    const char *Msg =
-        "Bytes string function accesses uninitialized/garbage values";
     if (!BT_UninitRead)
       BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
                                       "Accessing unitialized/garbage values"));
 
     auto Report =
         std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
+    Report->addNote("Other elements might also be undefined",
+                    Report->getLocation());
     Report->addRange(E->getSourceRange());
     bugreporter::trackExpressionValue(N, E, *Report);
     C.emitReport(std::move(Report));
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index c535e018e62c27..97625a36506633 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -16,7 +16,31 @@ void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
 
 void top(char *dst) {
   char buf[10];
-  memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
+  memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}}
+                        // expected-note at -1{{Other elements might also be undefined}}
+  (void)buf;
+}
+
+void top2(char *dst) {
+  char buf[10];
+  buf[0] = 'i';
+  memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
+                        // expected-note at -1{{Other elements might also be undefined}}
+  (void)buf;
+}
+
+void top3(char *dst) {
+  char buf[10];
+  buf[0] = 'i';
+  memcpy(dst, buf, 1);
+  (void)buf;
+}
+
+void top4(char *dst) {
+  char buf[10];
+  buf[0] = 'i';
+  memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
+                        // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
 
@@ -26,16 +50,12 @@ void top(char *dst) {
 
 void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
 
-void mempcpy14() {
+void mempcpy13() {
   int src[] = {1, 2, 3, 4};
   int dst[5] = {0};
   int *p;
 
-  p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
-   // FIXME: This behaviour is actually surprising and needs to be fixed, 
-   // mempcpy seems to consider the very last byte of the src buffer uninitialized
-   // and returning undef unfortunately. It should have returned unknown or a conjured value instead.
-
+  p = mempcpy(dst, src, 4 * sizeof(int));
   clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
 }
 
@@ -52,8 +72,34 @@ void mempcpy15() {
   struct st *p2;
 
   p1 = (&s2) + 1;
-  p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string function accesses uninitialized/garbage values}}
-  // FIXME: It seems same as mempcpy14() case.
-  
+
+  p2 = mempcpy(&s2, &s1, sizeof(struct st));
+
   clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
 }
+
+void mempcpy16() {
+  struct st s1;
+  struct st s2;
+
+  // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully
+  // initialized?
+  mempcpy(&s2, &s1, sizeof(struct st));
+}
+
+void initialized(int *dest) {
+  int t[] = {1, 2, 3};
+  memcpy(dest, t, sizeof(t));
+}
+
+// Creduced crash.
+
+void *ga_copy_strings_from_0;
+void *memmove();
+void alloc();
+void ga_copy_strings() {
+  int i = 0;
+  for (;; ++i)
+    memmove(alloc, ((char **)ga_copy_strings_from_0)[i], 1);
+}
+

>From 70dfed2cb0a6b871cb5b60b19740d411db9bd8ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Tue, 18 Jun 2024 13:30:03 +0200
Subject: [PATCH 2/6] Partial revisions according to reviewer comments

---
 .../Core/PathSensitive/MemRegion.h            | 11 +---
 .../Checkers/CStringChecker.cpp               | 52 ++++++++-----------
 clang/lib/StaticAnalyzer/Core/MemRegion.cpp   | 11 ++++
 clang/test/Analysis/bstring_UninitRead.c      | 52 ++++++++++---------
 4 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 9e9f2f4fc1a0e5..0d9566285f5d4e 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -174,16 +174,7 @@ class MemRegion : public llvm::FoldingSetNode {
 
   Kind getKind() const { return kind; }
 
-  StringRef getKindStr() const {
-    switch (getKind()) {
-#define REGION(Id, Parent)                                                     \
-  case Id##Kind:                                                               \
-    return #Id;
-#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
-#undef REGION
-    }
-    llvm_unreachable("Unkown kind!");
-  }
+  StringRef getKindStr() const;
 
   template<typename RegionTy> const RegionTy* getAs() const;
   template <typename RegionTy>
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index fdea2e01215b44..eaa999ff70c5f3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -360,15 +360,15 @@ REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
 //===----------------------------------------------------------------------===//
 
 std::pair<ProgramStateRef , ProgramStateRef >
-CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
+CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
                            QualType Ty) {
   std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
   if (!val)
-    return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
+    return std::pair<ProgramStateRef , ProgramStateRef >(State, State);
 
   SValBuilder &svalBuilder = C.getSValBuilder();
   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
-  return state->assume(svalBuilder.evalEQ(state, *val, zero));
+  return State->assume(svalBuilder.evalEQ(State, *val, zero));
 }
 
 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
@@ -403,8 +403,8 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
 
 static std::optional<NonLoc> getIndex(ProgramStateRef State,
                                       const ElementRegion *ER, CharKind CK) {
-  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
-  ASTContext &Ctx = SValBuilder.getContext();
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SVB.getContext();
 
   if (CK == CharKind::Regular) {
     if (ER->getValueType() != Ctx.CharTy)
@@ -417,18 +417,17 @@ static std::optional<NonLoc> getIndex(ProgramStateRef State,
 
   QualType SizeTy = Ctx.getSizeType();
   NonLoc WideSize =
-      SValBuilder
-          .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
-                      SizeTy)
+      SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+                     SizeTy)
           .castAs<NonLoc>();
   SVal Offset =
-      SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+      SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
   if (Offset.isUnknown())
     return {};
   return Offset.castAs<NonLoc>();
 }
 
-// Try to get hold of the origin regin (e.g. the actual array region from an
+// Try to get hold of the origin region (e.g. the actual array region from an
 // element region).
 static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
   const MemRegion *MR = ER->getSuperRegion();
@@ -440,8 +439,8 @@ static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
       return nullptr;
     Ret = sym2->getOriginRegion();
   }
-  if (const auto *element = MR->getAs<ElementRegion>()) {
-    Ret = element->getBaseRegion();
+  if (const auto *Elem = MR->getAs<ElementRegion>()) {
+    Ret = Elem->getBaseRegion();
   }
   return dyn_cast_or_null<TypedValueRegion>(Ret);
 }
@@ -472,21 +471,19 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!Orig)
     return State;
 
-  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
-  ASTContext &Ctx = SValBuilder.getContext();
-
   // FIXME: We ought to able to check objects as well. Maybe
   // UninitializedObjectChecker could help?
   if (!Orig->getValueType()->isArrayType())
     return State;
 
+  SValBuilder &SVB = C.getSValBuilder();
+  ASTContext &Ctx = SVB.getContext();
+
   const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
-  const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+  const NonLoc Zero = SVB.makeZeroArrayIndex();
 
-  SVal FirstElementVal =
+  Loc FirstElementVal =
       State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
-  if (!isa<Loc>(FirstElementVal))
-    return State;
 
   // Ensure that we wouldn't read uninitialized value.
   if (Filter.CheckCStringUninitializedRead &&
@@ -503,11 +500,10 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   // We won't check whether the entire region is fully initialized -- lets just
   // check that the first and the last element is. So, onto checking the last
   // element:
-  const QualType IdxTy = SValBuilder.getArrayIndexType();
+  const QualType IdxTy = SVB.getArrayIndexType();
 
   NonLoc ElemSize =
-      SValBuilder
-          .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+      SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
           .castAs<NonLoc>();
 
   // FIXME: Check that the size arg to the cstring function is divisible by
@@ -529,14 +525,13 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   // type. This value will be size of the array, or the index to the
   // past-the-end element.
   std::optional<NonLoc> Offset =
-      SValBuilder
-          .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
-                       IdxTy)
+      SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+                      IdxTy)
           .getAs<NonLoc>();
 
   // Retrieve the index of the last element.
-  const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
-  SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+  const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
+  SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
 
   if (!Offset)
     return State;
@@ -598,8 +593,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
   DefinedOrUnknownSVal Size =
       getDynamicExtent(state, superReg, C.getSValBuilder());
 
-  ProgramStateRef StInBound, StOutBound;
-  std::tie(StInBound, StOutBound) = state->assumeInBoundDual(*Idx, Size);
+  auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
   if (StOutBound && !StInBound) {
     // These checks are either enabled by the CString out-of-bounds checker
     // explicitly or implicitly by the Malloc checker.
diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index 6fe929b1cb94a7..693791c3aee8b9 100644
--- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -630,6 +630,17 @@ bool MemRegion::canPrintPrettyAsExpr() const {
   return false;
 }
 
+StringRef MemRegion::getKindStr() const {
+  switch (getKind()) {
+#define REGION(Id, Parent)                                                     \
+  case Id##Kind:                                                               \
+    return #Id;
+#include "clang/StaticAnalyzer/Core/PathSensitive/Regions.def"
+#undef REGION
+  }
+  llvm_unreachable("Unkown kind!");
+}
+
 void MemRegion::printPretty(raw_ostream &os) const {
   assert(canPrintPretty() && "This region cannot be printed pretty.");
   os << "'";
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index 97625a36506633..945975bfaaf69a 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -1,12 +1,11 @@
 // RUN: %clang_analyze_cc1 -verify %s \
 // RUN: -analyzer-checker=core,alpha.unix.cstring
 
-
-// This file is generally for the alpha.unix.cstring.UninitializedRead Checker, the reason for putting it into
-// the separate file because the checker is break the some existing test cases in bstring.c file , so we don't 
-// wanna mess up with some existing test case so it's better to create separate file for it, this file also include 
-// the broken test for the reference in future about the broken tests.
-
+//===----------------------------------------------------------------------===//
+// mempcpy() using character array. This is the easiest case, as memcpy
+// intepretrs the dst and src buffers as character arrays (regardless of their
+// actual type).
+//===----------------------------------------------------------------------===//
 
 typedef typeof(sizeof(int)) size_t;
 
@@ -14,14 +13,14 @@ void clang_analyzer_eval(int);
 
 void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
 
-void top(char *dst) {
+void memcpy_array_fully_uninit(char *dst) {
   char buf[10];
   memcpy(dst, buf, 10); // expected-warning{{The first element of the 2nd argument is undefined}}
                         // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
 
-void top2(char *dst) {
+void memcpy_array_partially_uninit(char *dst) {
   char buf[10];
   buf[0] = 'i';
   memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
@@ -29,14 +28,14 @@ void top2(char *dst) {
   (void)buf;
 }
 
-void top3(char *dst) {
+void memcpy_array_only_init_portion(char *dst) {
   char buf[10];
   buf[0] = 'i';
   memcpy(dst, buf, 1);
   (void)buf;
 }
 
-void top4(char *dst) {
+void memcpy_array_partially_init_error(char *dst) {
   char buf[10];
   buf[0] = 'i';
   memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
@@ -44,28 +43,36 @@ void top4(char *dst) {
   (void)buf;
 }
 
-//===----------------------------------------------------------------------===
-// mempcpy()
-//===----------------------------------------------------------------------===
+//===----------------------------------------------------------------------===//
+// mempcpy() using non-character arrays.
+//===----------------------------------------------------------------------===//
 
 void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
 
-void mempcpy13() {
+void memcpy_int_array_fully_init() {
   int src[] = {1, 2, 3, 4};
   int dst[5] = {0};
   int *p;
 
   p = mempcpy(dst, src, 4 * sizeof(int));
-  clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
+  clang_analyzer_eval(p == &dst[4]);
 }
 
+void memcpy_int_array_fully_init2(int *dest) {
+  int t[] = {1, 2, 3};
+  memcpy(dest, t, sizeof(t));
+}
+
+//===----------------------------------------------------------------------===//
+// mempcpy() using nonarrays.
+//===----------------------------------------------------------------------===//
+
 struct st {
   int i;
   int j;
 };
 
-
-void mempcpy15() {
+void mempcpy_struct_partially_uninit() {
   struct st s1 = {0};
   struct st s2;
   struct st *p1;
@@ -73,12 +80,14 @@ void mempcpy15() {
 
   p1 = (&s2) + 1;
 
+  // FIXME: Maybe ask UninitializedObjectChecker whether s1 is fully
+  // initialized?
   p2 = mempcpy(&s2, &s1, sizeof(struct st));
 
-  clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
+  clang_analyzer_eval(p1 == p2);
 }
 
-void mempcpy16() {
+void mempcpy_struct_fully_uninit() {
   struct st s1;
   struct st s2;
 
@@ -87,11 +96,6 @@ void mempcpy16() {
   mempcpy(&s2, &s1, sizeof(struct st));
 }
 
-void initialized(int *dest) {
-  int t[] = {1, 2, 3};
-  memcpy(dest, t, sizeof(t));
-}
-
 // Creduced crash.
 
 void *ga_copy_strings_from_0;

>From 8bf7c44fa423c0376e1a0b4afbda02daaebed83c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Tue, 18 Jun 2024 17:06:18 +0200
Subject: [PATCH 3/6] Address the remaining comments

---
 .../Checkers/CStringChecker.cpp               | 16 ++++--------
 clang/test/Analysis/bstring_UninitRead.c      | 25 +++++++++++++++----
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index eaa999ff70c5f3..bc31c268fca9dc 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -439,9 +439,6 @@ static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
       return nullptr;
     Ret = sym2->getOriginRegion();
   }
-  if (const auto *Elem = MR->getAs<ElementRegion>()) {
-    Ret = Elem->getBaseRegion();
-  }
   return dyn_cast_or_null<TypedValueRegion>(Ret);
 }
 
@@ -460,10 +457,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     return nullptr;
 
   const MemRegion *R = Element.getAsRegion();
-  if (!R)
-    return State;
-
-  const auto *ER = dyn_cast<ElementRegion>(R);
+  const auto *ER = dyn_cast_or_null<ElementRegion>(R);
   if (!ER)
     return State;
 
@@ -552,11 +546,11 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     }
     llvm::SmallString<258> Buf;
     llvm::raw_svector_ostream OS(Buf);
-    OS << "The last element of the ";
-    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
-    OS << " argument to access (the ";
+    OS << "The last (";
     printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
-    OS << ") is undefined";
+    OS << ") element to access in the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument is undefined";
     emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
     return nullptr;
   }
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index 945975bfaaf69a..7937146d6364e4 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -23,7 +23,7 @@ void memcpy_array_fully_uninit(char *dst) {
 void memcpy_array_partially_uninit(char *dst) {
   char buf[10];
   buf[0] = 'i';
-  memcpy(dst, buf, 10); // expected-warning{{The last element of the 2nd argument to access (the 10th) is undefined}}
+  memcpy(dst, buf, 10); // expected-warning{{The last (10th) element to access in the 2nd argument is undefined}}
                         // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
@@ -38,8 +38,23 @@ void memcpy_array_only_init_portion(char *dst) {
 void memcpy_array_partially_init_error(char *dst) {
   char buf[10];
   buf[0] = 'i';
-  memcpy(dst, buf, 2); // expected-warning{{The last element of the 2nd argument to access (the 2nd) is undefined}}
-                        // expected-note at -1{{Other elements might also be undefined}}
+  memcpy(dst, buf, 2); // expected-warning{{The last (2nd) element to access in the 2nd argument is undefined}}
+                      // expected-note at -1{{Other elements might also be undefined}}
+  (void)buf;
+}
+
+// The interesting case here is that the portion we're copying is initialized,
+// but not the whole matrix. We need to be careful to extract buf[1], and not
+// buf when trying to peel region layers off from the source argument.
+void memcpy_array_from_matrix(char *dst) {
+  char buf[2][2];
+  buf[1][0] = 'i';
+  buf[1][1] = 'j';
+  // FIXME: This is a FP -- we mistakenly retrieve the first element of buf,
+  // instead of the first element of buf[1]. getLValueElement simply peels off
+  // another ElementRegion layer, when in this case it really shouldn't.
+  memcpy(dst, buf[1], 2); // expected-warning{{The first element of the 2nd argument is undefined}}
+                          // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
 
@@ -96,8 +111,8 @@ void mempcpy_struct_fully_uninit() {
   mempcpy(&s2, &s1, sizeof(struct st));
 }
 
-// Creduced crash.
-
+// Creduced crash. In this case, an symbolicregion is wrapped in an
+// elementregion for the src argument.
 void *ga_copy_strings_from_0;
 void *memmove();
 void alloc();

>From 42957c5e85bba8be5b37d8ca0b31b9466280367c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Thu, 20 Jun 2024 10:29:46 +0200
Subject: [PATCH 4/6] Change getOriginRegion to getSuperRegion

---
 .../StaticAnalyzer/Checkers/CStringChecker.cpp  | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index bc31c268fca9dc..f7e04de269dd52 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -427,21 +427,6 @@ static std::optional<NonLoc> getIndex(ProgramStateRef State,
   return Offset.castAs<NonLoc>();
 }
 
-// Try to get hold of the origin region (e.g. the actual array region from an
-// element region).
-static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
-  const MemRegion *MR = ER->getSuperRegion();
-  const MemRegion *Ret = MR;
-  assert(MR);
-  if (const auto *sym = MR->getAs<SymbolicRegion>()) {
-    SymbolRef sym2 = sym->getSymbol();
-    if (!sym2)
-      return nullptr;
-    Ret = sym2->getOriginRegion();
-  }
-  return dyn_cast_or_null<TypedValueRegion>(Ret);
-}
-
 // Basically 1 -> 1st, 12 -> 12th, etc.
 static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
   Os << Idx << llvm::getOrdinalSuffix(Idx);
@@ -461,7 +446,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!ER)
     return State;
 
-  const TypedValueRegion *Orig = getOriginRegion(ER);
+  const auto *Orig = ER->getSuperRegion()->getAs<TypedValueRegion>();
   if (!Orig)
     return State;
 

>From 59dadadd95cb58604d95ee28b49314cd8741f476 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Mon, 24 Jun 2024 13:29:49 +0200
Subject: [PATCH 5/6] Further refine the warning message

---
 .../StaticAnalyzer/Checkers/CStringChecker.cpp | 18 +++++++++---------
 clang/test/Analysis/bstring_UninitRead.c       |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index f7e04de269dd52..d913d2ce3290a3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -446,23 +446,23 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!ER)
     return State;
 
-  const auto *Orig = ER->getSuperRegion()->getAs<TypedValueRegion>();
-  if (!Orig)
+  const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
+  if (!SuperR)
     return State;
 
   // FIXME: We ought to able to check objects as well. Maybe
   // UninitializedObjectChecker could help?
-  if (!Orig->getValueType()->isArrayType())
+  if (!SuperR->getValueType()->isArrayType())
     return State;
 
   SValBuilder &SVB = C.getSValBuilder();
   ASTContext &Ctx = SVB.getContext();
 
-  const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
+  const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
   const NonLoc Zero = SVB.makeZeroArrayIndex();
 
   Loc FirstElementVal =
-      State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
+      State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).castAs<Loc>();
 
   // Ensure that we wouldn't read uninitialized value.
   if (Filter.CheckCStringUninitializedRead &&
@@ -516,7 +516,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     return State;
 
   SVal LastElementVal =
-      State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig));
+      State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
   if (!isa<Loc>(LastElementVal))
     return State;
 
@@ -531,9 +531,9 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     }
     llvm::SmallString<258> Buf;
     llvm::raw_svector_ostream OS(Buf);
-    OS << "The last (";
-    printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
-    OS << ") element to access in the ";
+    OS << "The last accessed element (at index ";
+    OS << IdxInt->getExtValue();
+    OS << ") in the ";
     printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
     OS << " argument is undefined";
     emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index 7937146d6364e4..d7ac8ce94d3fe6 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -23,7 +23,7 @@ void memcpy_array_fully_uninit(char *dst) {
 void memcpy_array_partially_uninit(char *dst) {
   char buf[10];
   buf[0] = 'i';
-  memcpy(dst, buf, 10); // expected-warning{{The last (10th) element to access in the 2nd argument is undefined}}
+  memcpy(dst, buf, 10); // expected-warning{{The last accessed element (at index 9) in the 2nd argument is undefined}}
                         // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }
@@ -38,7 +38,7 @@ void memcpy_array_only_init_portion(char *dst) {
 void memcpy_array_partially_init_error(char *dst) {
   char buf[10];
   buf[0] = 'i';
-  memcpy(dst, buf, 2); // expected-warning{{The last (2nd) element to access in the 2nd argument is undefined}}
+  memcpy(dst, buf, 2); // expected-warning{{The last accessed element (at index 1) in the 2nd argument is undefined}}
                       // expected-note at -1{{Other elements might also be undefined}}
   (void)buf;
 }

>From d37f0cb4ceb0f99947be4ee65564b257dc095e6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Mon, 1 Jul 2024 13:23:28 +0200
Subject: [PATCH 6/6] Fix a crash

---
 clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp | 8 +++++---
 clang/test/Analysis/bstring_UninitRead.c             | 8 ++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index d913d2ce3290a3..9fe53a0bcb2b94 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -461,12 +461,14 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
   const NonLoc Zero = SVB.makeZeroArrayIndex();
 
-  Loc FirstElementVal =
-      State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).castAs<Loc>();
+  std::optional<Loc> FirstElementVal =
+      State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
+  if (!FirstElementVal)
+    return State;
 
   // Ensure that we wouldn't read uninitialized value.
   if (Filter.CheckCStringUninitializedRead &&
-      State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) {
+      State->getSVal(*FirstElementVal).isUndef()) {
     llvm::SmallString<258> Buf;
     llvm::raw_svector_ostream OS(Buf);
     OS << "The first element of the ";
diff --git a/clang/test/Analysis/bstring_UninitRead.c b/clang/test/Analysis/bstring_UninitRead.c
index d7ac8ce94d3fe6..45e38dd3162988 100644
--- a/clang/test/Analysis/bstring_UninitRead.c
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -122,3 +122,11 @@ void ga_copy_strings() {
     memmove(alloc, ((char **)ga_copy_strings_from_0)[i], 1);
 }
 
+// Creduced crash. In this case, retrieving the Loc for the first element failed.
+char mov_mdhd_language_map[][4] = {};
+int ff_mov_lang_to_iso639_code;
+char *ff_mov_lang_to_iso639_to;
+void ff_mov_lang_to_iso639() {
+  memcpy(ff_mov_lang_to_iso639_to,
+         mov_mdhd_language_map[ff_mov_lang_to_iso639_code], 4);
+}



More information about the cfe-commits mailing list