[clang] [analyzer] Enhance array bound checking for `ConstantArrayType` (PR #159357)
Alejandro Álvarez Ayllón via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 18 08:10:17 PDT 2025
================
@@ -555,24 +588,305 @@ std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const {
return std::string(Out.str());
}
-bool StateUpdateReporter::providesInformationAboutInteresting(
- SymbolRef Sym, PathSensitiveBugReport &BR) {
- if (!Sym)
- return false;
- for (SymbolRef PartSym : Sym->symbols()) {
- // The interestingess mark may appear on any layer as we're stripping off
- // the SymIntExpr, UnarySymExpr etc. layers...
- if (BR.isInteresting(PartSym))
- return true;
- // ...but if both sides of the expression are symbolic, then there is no
- // practical algorithm to produce separate constraints for the two
- // operands (from the single combined result).
- if (isa<SymSymExpr>(PartSym))
+// If the base expression of `expr` refers to a `ConstantArrayType`,
+// return the element type and the array size.
+// Note that "real" Flexible Array Members are `IncompleteArrayType`.
+// For them, this function returns `std::nullopt`.
+static std::optional<std::pair<QualType, nonloc::ConcreteInt>>
+getArrayTypeInfo(SValBuilder &svb, ArraySubscriptExpr const *expr) {
+ auto const *arrayBaseExpr = expr->getBase()->IgnoreParenImpCasts();
+ auto const arrayQualType = arrayBaseExpr->getType().getCanonicalType();
+ if (arrayQualType.isNull() || !arrayQualType->isConstantArrayType()) {
+ return std::nullopt;
+ }
+ auto *constArrayType =
+ dyn_cast<ConstantArrayType>(arrayQualType->getAsArrayTypeUnsafe());
+ if (!constArrayType) {
+ return std::nullopt;
+ }
+ return std::make_pair(
+ constArrayType->getElementType(),
+ // Note that an array size is technically unsigned, but
+ // `compareValueToThreshold` (via `getSimplifiedOffsets`)
+ // will do some arithmetics that could overflow and cause FN. For instance
+ // ```
+ // int array[20];
+ // array[unsigned_index + 21];
+ // ```
+ // `unsigned_index + 21 < 20` is turned into `unsigned_index < 20 - 21`,
+ // and if 20 is unsigned, that will overflow to the biggest possible array
+ // which is always trivially true. We obviously do not want that, so we
+ // need to treat the size as signed.
+ svb.makeIntVal(llvm::APSInt{constArrayType->getSize(), false}));
+}
+
+static Messages getNegativeIndexMessage(StringRef Name,
+ nonloc::ConcreteInt ArraySize,
+ NonLoc Index) {
+ auto const ArraySizeVal = ArraySize.getValue()->getZExtValue();
+ std::string const IndexStr = [&]() -> std::string {
+ if (auto ConcreteIndex = getConcreteValue(Index);
+ ConcreteIndex.has_value()) {
+ return formatv(" {0}", ConcreteIndex);
+ }
+ return "";
+ }();
+
+ return {formatv("Out of bound access to {0} at a negative index", Name),
+ formatv("Access of {0} containing {1} elements at negative index{2}",
+ Name, ArraySizeVal, IndexStr)};
+}
+
+static std::string truncateWithEllipsis(StringRef str, size_t maxLength) {
+ if (str.size() <= maxLength)
+ return str.str();
+
+ return (str.substr(0, maxLength - 3) + "...").str();
+}
+
+static Messages getOOBIndexMessage(StringRef Name, NonLoc Index,
+ nonloc::ConcreteInt ArraySize,
+ QualType ElemType,
+ bool AlsoMentionUnderflow) {
+ std::optional<int64_t> IndexN = getConcreteValue(Index);
+ int64_t ExtentN = ArraySize.getValue()->getZExtValue();
+
+ SmallString<256> Buf;
+ llvm::raw_svector_ostream Out(Buf);
+ Out << "Access of " << Name << " at ";
+ if (AlsoMentionUnderflow) {
+ Out << "a negative or overflowing index";
+ } else if (IndexN.has_value()) {
+ Out << "index " << *IndexN;
+ } else {
+ Out << "an overflowing index";
+ }
+
+ const auto ElemTypeStr = truncateWithEllipsis(ElemType.getAsString(), 20);
+
+ Out << ", while it holds only ";
+ if (ExtentN != 1)
+ Out << ExtentN << " '" << ElemTypeStr << "' elements";
+ else
+ Out << "a single " << "'" << ElemTypeStr << "' element";
+
+ return {formatv("Out of bound access to memory {0} {1}",
+ AlsoMentionUnderflow ? "around" : "after the end of", Name),
+ std::string(Buf)};
+}
+
+// "True" flexible array members do not specify any size (`int elems[];`)
+// However, some projects use "fake flexible arrays" (aka "struct hack"), where
+// they specify a size of 0 or 1 to work around a compiler limitation.
+// "True" flexible array members are `IncompleteArrayType` and will be skipped
+// by `performCheckArrayTypeIndex`. We need an heuristic to identify "fake"
+// ones.
----------------
alejandro-alvarez-sonarsource wrote:
There is even `Expr::isFlexibleArrayMemberLike`, I don't know how I overlooked that, sorry.
Removed the re-invented check. Also, the checker can now leverage the flag mentioned by @steakhal.
https://github.com/llvm/llvm-project/pull/159357
More information about the cfe-commits
mailing list