[clang] [analyzer] Enhance array bound checking for `ConstantArrayType` (PR #159357)

Alejandro Álvarez Ayllón via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 18 08:10:17 PDT 2025


================
@@ -555,24 +588,305 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
   return std::string(Out.str());
 }
 
-bool StateUpdateReporter::providesInformationAboutInteresting(
-    SymbolRef Sym, PathSensitiveBugReport &BR) {
-  if (!Sym)
-    return false;
-  for (SymbolRef PartSym : Sym->symbols()) {
-    // The interestingess mark may appear on any layer as we're stripping off
-    // the SymIntExpr, UnarySymExpr etc. layers...
-    if (BR.isInteresting(PartSym))
-      return true;
-    // ...but if both sides of the expression are symbolic, then there is no
-    // practical algorithm to produce separate constraints for the two
-    // operands (from the single combined result).
-    if (isa<SymSymExpr>(PartSym))
+// If the base expression of `expr` refers to a `ConstantArrayType`,
+// return the element type and the array size.
+// Note that "real" Flexible Array Members are `IncompleteArrayType`.
+// For them, this function returns `std::nullopt`.
+static std::optional<std::pair<QualType, nonloc::ConcreteInt>>
+getArrayTypeInfo(SValBuilder &svb, ArraySubscriptExpr const *expr) {
+  auto const *arrayBaseExpr = expr->getBase()->IgnoreParenImpCasts();
+  auto const arrayQualType = arrayBaseExpr->getType().getCanonicalType();
+  if (arrayQualType.isNull() || !arrayQualType->isConstantArrayType()) {
+    return std::nullopt;
+  }
+  auto *constArrayType =
+      dyn_cast<ConstantArrayType>(arrayQualType->getAsArrayTypeUnsafe());
+  if (!constArrayType) {
+    return std::nullopt;
+  }
+  return std::make_pair(
+      constArrayType->getElementType(),
+      // Note that an array size is technically unsigned, but
+      // `compareValueToThreshold` (via `getSimplifiedOffsets`)
+      // will do some arithmetics that could overflow and cause FN. For instance
+      // ```
+      // int array[20];
+      // array[unsigned_index + 21];
+      // ```
+      // `unsigned_index + 21 < 20` is turned into `unsigned_index < 20 - 21`,
+      // and if 20 is unsigned, that will overflow to the biggest possible array
+      // which is always trivially true. We obviously do not want that, so we
+      // need to treat the size as signed.
+      svb.makeIntVal(llvm::APSInt{constArrayType->getSize(), false}));
+}
+
+static Messages getNegativeIndexMessage(StringRef Name,
+                                        nonloc::ConcreteInt ArraySize,
+                                        NonLoc Index) {
+  auto const ArraySizeVal = ArraySize.getValue()->getZExtValue();
+  std::string const IndexStr = [&]() -> std::string {
+    if (auto ConcreteIndex = getConcreteValue(Index);
+        ConcreteIndex.has_value()) {
+      return formatv(" {0}", ConcreteIndex);
+    }
+    return "";
+  }();
+
+  return {formatv("Out of bound access to {0} at a negative index", Name),
+          formatv("Access of {0} containing {1} elements at negative index{2}",
+                  Name, ArraySizeVal, IndexStr)};
+}
+
+static std::string truncateWithEllipsis(StringRef str, size_t maxLength) {
+  if (str.size() <= maxLength)
+    return str.str();
+
+  return (str.substr(0, maxLength - 3) + "...").str();
+}
+
+static Messages getOOBIndexMessage(StringRef Name, NonLoc Index,
+                                   nonloc::ConcreteInt ArraySize,
+                                   QualType ElemType,
+                                   bool AlsoMentionUnderflow) {
+  std::optional<int64_t> IndexN = getConcreteValue(Index);
+  int64_t ExtentN = ArraySize.getValue()->getZExtValue();
+
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Access of " << Name << " at ";
+  if (AlsoMentionUnderflow) {
+    Out << "a negative or overflowing index";
+  } else if (IndexN.has_value()) {
+    Out << "index " << *IndexN;
+  } else {
+    Out << "an overflowing index";
+  }
+
+  const auto ElemTypeStr = truncateWithEllipsis(ElemType.getAsString(), 20);
+
+  Out << ", while it holds only ";
+  if (ExtentN != 1)
+    Out << ExtentN << " '" << ElemTypeStr << "' elements";
+  else
+    Out << "a single " << "'" << ElemTypeStr << "' element";
+
+  return {formatv("Out of bound access to memory {0} {1}",
+                  AlsoMentionUnderflow ? "around" : "after the end of", Name),
+          std::string(Buf)};
+}
+
+// "True" flexible array members do not specify any size (`int elems[];`)
+// However, some projects use "fake flexible arrays" (aka "struct hack"), where
+// they specify a size of 0 or 1 to work around a compiler limitation.
+// "True" flexible array members are `IncompleteArrayType` and will be skipped
+// by `performCheckArrayTypeIndex`. We need an heuristic to identify "fake"
+// ones.
----------------
alejandro-alvarez-sonarsource wrote:

There is even `Expr::isFlexibleArrayMemberLike`, I don't know how I overlooked that, sorry.
Removed the re-invented check. Also, the checker can now leverage the flag mentioned by @steakhal.

https://github.com/llvm/llvm-project/pull/159357


More information about the cfe-commits mailing list