[clang] [llvm] [NFC][analyzer] Extract bounds checking library (PR #202372)
DonĂ¡t Nagy via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 29 02:08:16 PDT 2026
================
@@ -0,0 +1,211 @@
+//===- BoundsChecking.h - Bounds checking related APIs ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines APIs for performing a bounds check (i.e. comparing a
+// symbolic Offset value to zero and a symbolic Extent value) and composing
+// descriptions that explain its results.
+//
+// This is intended as a replacement for `ProgramState::assumeInBound` to
+// avoid its incorrect logic and compensate for deficiencies of other parts of
+// the analyzer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_BOUNDSCHECKING_H
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <optional>
+
+namespace clang {
+namespace ento {
+
+/// If `E` is an array subscript expression with a base that is "clean" (= not
+/// modified by pointer arithmetic = the beginning of a memory region), return
+/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr.
+/// This helper function is used by two separate heuristics that are only valid
+/// in these "clean" cases.
+const ArraySubscriptExpr *getAsCleanArraySubscriptExpr(const Expr *E,
+ const CheckerContext &C);
+
+class SizeUnit {
+ QualType AsType;
+ int64_t AsCharUnits;
+
+ SizeUnit() : AsType(), AsCharUnits(1) {}
+
+public:
+ SizeUnit(QualType T, const ASTContext &ACtx)
+ : AsType(T), AsCharUnits(ACtx.getTypeSizeInChars(T).getQuantity()) {
+ assert(!T.isNull());
+ }
+
+ static SizeUnit bytes() { return SizeUnit(); }
+
+ bool isBytes() const { return AsType.isNull(); }
+
+ /// If `E` is a "clean" array subscript expression, return the type of the
+ /// accessed element; otherwise return 'Bytes' because that's the best (or
+ /// least bad) option for the assumption messages that use this.
+ static SizeUnit forExpr(const Expr *E, const CheckerContext &C) {
+ const auto *ASE = getAsCleanArraySubscriptExpr(E, C);
+ if (!ASE)
+ return bytes();
+
+ return SizeUnit(ASE->getType(), C.getASTContext());
+ }
+
+ /// Return the element type that is "natural" for reporting out-of-bounds
+ /// memory access to 'Location'.
+ /// FIXME: It is unfortunate that this heuristic differs from the heuristic
+ /// used for reporting assumption (`SizeUnit::forExpr`).
+ static SizeUnit forSVal(SVal Location, const ASTContext &ACtx) {
+ const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
+ assert(EReg && "this checker only handles element access");
+ return SizeUnit(EReg->getElementType(), ACtx);
+ }
+
+ int64_t asCharUnits() const { return AsCharUnits; }
+
+ std::string asExtentDesc() const {
+ if (isBytes())
+ return "the extent of";
+ return llvm::formatv("the number of '{0}' elements in",
+ AsType.getAsString());
+ }
+
+ std::string asElementName() const {
+ if (isBytes())
+ return "byte";
+ return llvm::formatv("'{0}' element", AsType.getAsString());
+ }
+
+ std::string getOffsetName() const {
+ return isBytes() ? "byte offset" : "index";
+ }
+
+ /// Try to divide `Val1` and `Val2` (in place) by `this->asCharUnits()` and
+ /// return true if it can be performed without remainder. The values `Val1`
+ /// and `Val2` may be nullopt and in that case the corresponding division is
+ /// considered to be successful.
+ bool tryConvertValuesFromBytes(std::optional<int64_t> &Val1,
+ std::optional<int64_t> &Val2) const;
+};
+
+struct Messages {
+ std::string Short, Full;
+};
+
+enum class BadOffsetKind { Negative, Overflowing, Indeterminate };
+
+constexpr llvm::StringLiteral Adjectives[] = {"a negative", "an overflowing",
+ "a negative or overflowing"};
+inline StringRef asAdjective(BadOffsetKind Problem) {
+ return Adjectives[static_cast<int>(Problem)];
+}
----------------
NagyDonat wrote:
> Currently, it will silently overflow when passing `Indeterminate`.
No, it does not overflow, `Indeterminate` is equal to 2.
For me it was a traditional pattern that `enum {A, B, C};` and `string Array[] = {"A", "B", "C"};` are in a natural correspondence (i.e. `Array[A] == "A"`, `Array[B] == "B"` `Array[C] == "C"`) because everything is zero-based. Also, I felt that the `enum class` is enough guarantee to rule out overflows (only an explicitly fishy cast could create an overflowing `BadOffsetKind`).
However, I could switch to another way of defining this enum -> string mapping if you think that this is confusing.
What would be your preferred solution for introducing this sort of mapping? Just using `std::array` or some other pattern (e.g. perhaps a switch?).
https://github.com/llvm/llvm-project/pull/202372
More information about the cfe-commits
mailing list