[clang] [analyzer] Enforce not making overly complicated symbols (PR #144327)
Balázs Benics via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 27 01:55:18 PDT 2025
https://github.com/balazs-benics-sonarsource updated https://github.com/llvm/llvm-project/pull/144327
>From bc7dfc2b4f143c2caa1189db096bf9e4ea76f053 Mon Sep 17 00:00:00 2001
From: Balazs Benics <balazs.benics at sonarsource.com>
Date: Mon, 16 Jun 2025 12:14:06 +0200
Subject: [PATCH 1/2] [analyzer] Enforce not making overly complicated symbols
CPP-6182
---
.../Core/PathSensitive/SValBuilder.h | 8 +-
.../Core/PathSensitive/SymExpr.h | 19 +--
.../Core/PathSensitive/SymbolManager.h | 127 +++++++++++++-----
.../Core/PathSensitive/Symbols.def | 1 +
clang/lib/StaticAnalyzer/Checkers/Taint.cpp | 2 +-
.../Checkers/TrustNonnullChecker.cpp | 2 +-
clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 2 +-
.../Core/RangeConstraintManager.cpp | 11 +-
clang/lib/StaticAnalyzer/Core/SValBuilder.cpp | 43 +++---
.../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 6 +-
.../lib/StaticAnalyzer/Core/SymbolManager.cpp | 13 ++
.../ensure-capped-symbol-complexity.cpp | 58 ++++++++
12 files changed, 211 insertions(+), 81 deletions(-)
create mode 100644 clang/test/Analysis/ensure-capped-symbol-complexity.cpp
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
index 2911554de9d97..fae78c564e0ab 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -57,6 +57,8 @@ class SValBuilder {
protected:
ASTContext &Context;
+ const AnalyzerOptions &AnOpts;
+
/// Manager of APSInt values.
BasicValueFactory BasicVals;
@@ -68,8 +70,6 @@ class SValBuilder {
ProgramStateManager &StateMgr;
- const AnalyzerOptions &AnOpts;
-
/// The scalar type to use for array indices.
const QualType ArrayIndexTy;
@@ -326,8 +326,8 @@ class SValBuilder {
nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op,
const SymExpr *rhs, QualType type);
- NonLoc makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
- QualType type);
+ nonloc::SymbolVal makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
+ QualType type);
/// Create a NonLoc value for cast.
nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy,
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
index 6233a22d2ca2b..11d0a22a31c46 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -51,9 +51,11 @@ class SymExpr : public llvm::FoldingSetNode {
/// Note, however, that it can't be used in Profile because SymbolManager
/// needs to compute Profile before allocating SymExpr.
const SymbolID Sym;
+ const unsigned Complexity;
protected:
- SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}
+ SymExpr(Kind k, SymbolID Sym, unsigned Complexity)
+ : K(k), Sym(Sym), Complexity(Complexity) {}
static bool isValidTypeForSymbol(QualType T) {
// FIXME: Depending on whether we choose to deprecate structural symbols,
@@ -61,8 +63,6 @@ class SymExpr : public llvm::FoldingSetNode {
return !T.isNull() && !T->isVoidType();
}
- mutable unsigned Complexity = 0;
-
public:
virtual ~SymExpr() = default;
@@ -108,7 +108,7 @@ class SymExpr : public llvm::FoldingSetNode {
return llvm::make_range(symbol_iterator(this), symbol_iterator());
}
- virtual unsigned computeComplexity() const = 0;
+ unsigned complexity() const { return Complexity; }
/// Find the region from which this symbol originates.
///
@@ -136,10 +136,15 @@ using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
/// A symbol representing data which can be stored in a memory location
/// (region).
class SymbolData : public SymExpr {
+ friend class SymbolManager;
void anchor() override;
protected:
- SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }
+ SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym, computeComplexity()) {
+ assert(classof(this));
+ }
+
+ static unsigned computeComplexity(...) { return 1; }
public:
~SymbolData() override = default;
@@ -147,10 +152,6 @@ class SymbolData : public SymExpr {
/// Get a string representation of the kind of the region.
virtual StringRef getKindStr() const = 0;
- unsigned computeComplexity() const override {
- return 1;
- };
-
// Implement isa<T> support.
static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
static constexpr bool classof(Kind K) {
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 7af86cd721e8e..9c27669a61ba9 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -15,9 +15,11 @@
#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
#include "clang/AST/Expr.h"
+#include "clang/AST/OperationKinds.h"
#include "clang/AST/Type.h"
#include "clang/Analysis/AnalysisDeclContext.h"
#include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
@@ -26,9 +28,11 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
+#include <type_traits>
namespace clang {
@@ -133,6 +137,47 @@ class SymbolConjured : public SymbolData {
static constexpr bool classof(Kind K) { return K == ClassKind; }
};
+/// A symbol representing the result of an expression that became too
+/// complicated. In other words, its complexity surpassed the
+/// MaxSymbolComplexity threshold.
+/// TODO: When the MaxSymbolComplexity is reached, we should propagate the taint
+/// info to it.
+class SymbolOverlyComplex final : public SymbolData {
+ const SymExpr *OverlyComplicatedSymbol;
+
+ friend class SymExprAllocator;
+
+ SymbolOverlyComplex(SymbolID Sym, const SymExpr *OverlyComplicatedSymbol)
+ : SymbolData(ClassKind, Sym),
+ OverlyComplicatedSymbol(OverlyComplicatedSymbol) {
+ assert(OverlyComplicatedSymbol);
+ }
+
+public:
+ QualType getType() const override {
+ return OverlyComplicatedSymbol->getType();
+ }
+
+ StringRef getKindStr() const override;
+
+ void dumpToStream(raw_ostream &os) const override;
+
+ static void Profile(llvm::FoldingSetNodeID &profile,
+ const SymExpr *OverlyComplicatedSymbol) {
+ profile.AddInteger((unsigned)ClassKind);
+ profile.AddPointer(OverlyComplicatedSymbol);
+ }
+
+ void Profile(llvm::FoldingSetNodeID &profile) override {
+ Profile(profile, OverlyComplicatedSymbol);
+ }
+
+ // Implement isa<T> support.
+ static constexpr Kind ClassKind = SymbolOverlyComplexKind;
+ static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
+ static constexpr bool classof(Kind K) { return K == ClassKind; }
+};
+
/// A symbol representing the value of a MemRegion whose parent region has
/// symbolic value.
class SymbolDerived : public SymbolData {
@@ -285,6 +330,7 @@ class SymbolMetadata : public SymbolData {
/// Represents a cast expression.
class SymbolCast : public SymExpr {
+ friend class SymbolManager;
const SymExpr *Operand;
/// Type of the operand.
@@ -295,20 +341,19 @@ class SymbolCast : public SymExpr {
friend class SymExprAllocator;
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
- : SymExpr(ClassKind, Sym), Operand(In), FromTy(From), ToTy(To) {
+ : SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In),
+ FromTy(From), ToTy(To) {
assert(In);
assert(isValidTypeForSymbol(From));
// FIXME: GenericTaintChecker creates symbols of void type.
// Otherwise, 'To' should also be a valid type.
}
-public:
- unsigned computeComplexity() const override {
- if (Complexity == 0)
- Complexity = 1 + Operand->computeComplexity();
- return Complexity;
+ static unsigned computeComplexity(const SymExpr *In, QualType, QualType) {
+ return In->complexity() + 1;
}
+public:
QualType getType() const override { return ToTy; }
LLVM_ATTRIBUTE_RETURNS_NONNULL
@@ -336,6 +381,7 @@ class SymbolCast : public SymExpr {
/// Represents a symbolic expression involving a unary operator.
class UnarySymExpr : public SymExpr {
+ friend class SymbolManager;
const SymExpr *Operand;
UnaryOperator::Opcode Op;
QualType T;
@@ -343,7 +389,8 @@ class UnarySymExpr : public SymExpr {
friend class SymExprAllocator;
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
QualType T)
- : SymExpr(ClassKind, Sym), Operand(In), Op(Op), T(T) {
+ : SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In),
+ Op(Op), T(T) {
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
// modeled as x + 1.
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -354,13 +401,12 @@ class UnarySymExpr : public SymExpr {
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
}
-public:
- unsigned computeComplexity() const override {
- if (Complexity == 0)
- Complexity = 1 + Operand->computeComplexity();
- return Complexity;
+ static unsigned computeComplexity(const SymExpr *In, UnaryOperator::Opcode,
+ QualType) {
+ return In->complexity() + 1;
}
+public:
const SymExpr *getOperand() const { return Operand; }
UnaryOperator::Opcode getOpcode() const { return Op; }
QualType getType() const override { return T; }
@@ -391,8 +437,9 @@ class BinarySymExpr : public SymExpr {
QualType T;
protected:
- BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
- : SymExpr(k, Sym), Op(op), T(t) {
+ BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t,
+ unsigned Complexity)
+ : SymExpr(k, Sym, Complexity), Op(op), T(t) {
assert(classof(this));
// Binary expressions are results of arithmetic. Pointer arithmetic is not
// handled by binary expressions, but it is instead handled by applying
@@ -415,7 +462,7 @@ class BinarySymExpr : public SymExpr {
protected:
static unsigned computeOperandComplexity(const SymExpr *Value) {
- return Value->computeComplexity();
+ return Value->complexity();
}
static unsigned computeOperandComplexity(const llvm::APSInt &Value) {
return 1;
@@ -432,17 +479,26 @@ class BinarySymExpr : public SymExpr {
/// Template implementation for all binary symbolic expressions
template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassK>
class BinarySymExprImpl : public BinarySymExpr {
+ friend class SymbolManager;
LHSTYPE LHS;
RHSTYPE RHS;
friend class SymExprAllocator;
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
RHSTYPE rhs, QualType t)
- : BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
+ : BinarySymExpr(Sym, ClassKind, op, t,
+ computeComplexity(lhs, op, rhs, t)),
+ LHS(lhs), RHS(rhs) {
assert(getPointer(lhs));
assert(getPointer(rhs));
}
+ static unsigned computeComplexity(LHSTYPE lhs, BinaryOperator::Opcode,
+ RHSTYPE rhs, QualType) {
+ // FIXME: Should we add 1 to complexity?
+ return computeOperandComplexity(lhs) + computeOperandComplexity(rhs);
+ }
+
public:
void dumpToStream(raw_ostream &os) const override {
dumpToStreamImpl(os, LHS);
@@ -453,13 +509,6 @@ class BinarySymExprImpl : public BinarySymExpr {
LHSTYPE getLHS() const { return LHS; }
RHSTYPE getRHS() const { return RHS; }
- unsigned computeComplexity() const override {
- if (Complexity == 0)
- Complexity =
- computeOperandComplexity(RHS) + computeOperandComplexity(LHS);
- return Complexity;
- }
-
static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs,
BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) {
ID.AddInteger((unsigned)ClassKind);
@@ -520,27 +569,30 @@ class SymbolManager {
SymExprAllocator Alloc;
BasicValueFactory &BV;
ASTContext &Ctx;
+ const unsigned MaxCompComplexity;
public:
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
- llvm::BumpPtrAllocator &bpalloc)
- : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}
+ llvm::BumpPtrAllocator &bpalloc, const AnalyzerOptions &Opts)
+ : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx),
+ MaxCompComplexity(Opts.MaxSymbolComplexity) {
+ assert(MaxCompComplexity > 0 && "Zero max complexity doesn't make sense");
+ }
static bool canSymbolicate(QualType T);
- /// Create or retrieve a SymExpr of type \p SymExprT for the given arguments.
+ /// Create or retrieve a SymExpr of type \p T for the given arguments.
/// Use the arguments to check for an existing SymExpr and return it,
/// otherwise, create a new one and keep a pointer to it to avoid duplicates.
- template <typename SymExprT, typename... Args>
- const SymExprT *acquire(Args &&...args);
+ template <typename T, typename... Args,
+ typename Ret = std::conditional_t<SymbolData::classof(T::ClassKind),
+ T, SymExpr>>
+ LLVM_ATTRIBUTE_RETURNS_NONNULL const Ret *acquire(Args &&...args);
const SymbolConjured *conjureSymbol(ConstCFGElementRef Elem,
const LocationContext *LCtx, QualType T,
unsigned VisitCount,
- const void *SymbolTag = nullptr) {
-
- return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag);
- }
+ const void *SymbolTag = nullptr);
QualType getType(const SymExpr *SE) const {
return SE->getType();
@@ -673,8 +725,9 @@ class SymbolVisitor {
virtual bool VisitMemRegion(const MemRegion *) { return true; }
};
-template <typename T, typename... Args>
-const T *SymbolManager::acquire(Args &&...args) {
+// Returns a const pointer to T if T is a SymbolData, otherwise SymExpr.
+template <typename T, typename... Args, typename Ret>
+const Ret *SymbolManager::acquire(Args &&...args) {
llvm::FoldingSetNodeID profile;
T::Profile(profile, args...);
void *InsertPos;
@@ -682,8 +735,12 @@ const T *SymbolManager::acquire(Args &&...args) {
if (!SD) {
SD = Alloc.make<T>(std::forward<Args>(args)...);
DataSet.InsertNode(SD, InsertPos);
+ if (SD->complexity() > MaxCompComplexity) {
+ return cast<Ret>(acquire<SymbolOverlyComplex>(SD));
+ }
}
- return cast<T>(SD);
+
+ return cast<Ret>(SD);
}
} // namespace ento
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def
index b93f8e2501559..1c96e05f05b7d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def
@@ -45,6 +45,7 @@ SYMBOL(SymbolCast, SymExpr)
ABSTRACT_SYMBOL(SymbolData, SymExpr)
SYMBOL(SymbolConjured, SymbolData)
+ SYMBOL(SymbolOverlyComplex, SymbolData)
SYMBOL(SymbolDerived, SymbolData)
SYMBOL(SymbolExtent, SymbolData)
SYMBOL(SymbolMetadata, SymbolData)
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index e55d064253b84..f0dc889f15e7a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -267,7 +267,7 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
- Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
+ Sym->complexity() > Opts.MaxTaintedSymbolComplexity) {
return {};
}
diff --git a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
index e2f8bd541c967..ab0e3d8f56d86 100644
--- a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
@@ -66,7 +66,7 @@ class TrustNonnullChecker : public Checker<check::PostCall,
SVal Cond,
bool Assumption) const {
const SymbolRef CondS = Cond.getAsSymbol();
- if (!CondS || CondS->computeComplexity() > ComplexityThreshold)
+ if (!CondS || CondS->complexity() > ComplexityThreshold)
return State;
for (SymbolRef Antecedent : CondS->symbols()) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index fa8e669b6bb2f..3486485dcd686 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -67,7 +67,7 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
if (RightV.isUnknown()) {
unsigned Count = currBldrCtx->blockCount();
RightV = svalBuilder.conjureSymbolVal(nullptr, getCFGElementRef(), LCtx,
- Count);
+ RHS->getType(), Count);
}
// Simulate the effects of a "store": bind the value of the RHS
// to the L-Value represented by the LHS.
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index ab45e678bafd5..0e84a7046ad08 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -1264,7 +1264,9 @@ class SymbolicRangeInferrer
private:
SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
- : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
+ : SVB(S->getStateManager().getSValBuilder()),
+ SymMgr(S->getSymbolManager()), ValueFactory(F.getValueFactory()),
+ RangeFactory(F), State(S) {}
/// Infer range information from the given integer constant.
///
@@ -1525,8 +1527,6 @@ class SymbolicRangeInferrer
const SymExpr *RHS = SSE->getRHS();
QualType T = SSE->getType();
- SymbolManager &SymMgr = State->getSymbolManager();
-
// We use this variable to store the last queried operator (`QueriedOP`)
// for which the `getCmpOpState` returned with `Unknown`. If there are two
// different OPs that returned `Unknown` then we have to query the special
@@ -1540,8 +1540,7 @@ class SymbolicRangeInferrer
// Let's find an expression e.g. (x < y).
BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
- const SymSymExpr *SymSym =
- SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T);
+ SymbolRef SymSym = SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T);
const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
// If ranges were not previously found,
@@ -1622,6 +1621,8 @@ class SymbolicRangeInferrer
return RangeSet(RangeFactory, Zero);
}
+ SValBuilder &SVB;
+ SymbolManager &SymMgr;
BasicValueFactory &ValueFactory;
RangeSet::Factory &RangeFactory;
ProgramStateRef State;
diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index 2276c452cce76..a0f959d8adc1a 100644
--- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -51,11 +51,11 @@ void SValBuilder::anchor() {}
SValBuilder::SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
ProgramStateManager &stateMgr)
- : Context(context), BasicVals(context, alloc),
- SymMgr(context, BasicVals, alloc), MemMgr(context, alloc),
- StateMgr(stateMgr),
+ : Context(context),
AnOpts(
stateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions()),
+ BasicVals(context, alloc), SymMgr(context, BasicVals, alloc, AnOpts),
+ MemMgr(context, alloc), StateMgr(stateMgr),
ArrayIndexTy(context.LongLongTy),
ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {}
@@ -77,6 +77,9 @@ DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) {
nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs,
BinaryOperator::Opcode op,
APSIntPtr rhs, QualType type) {
+ // The Environment ensures we always get a persistent APSInt in
+ // BasicValueFactory, so we don't need to get the APSInt from
+ // BasicValueFactory again.
assert(lhs);
assert(!Loc::isLocType(type));
return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>(lhs, op, rhs, type));
@@ -98,8 +101,9 @@ nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs,
return nonloc::SymbolVal(SymMgr.acquire<SymSymExpr>(lhs, op, rhs, type));
}
-NonLoc SValBuilder::makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
- QualType type) {
+nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand,
+ UnaryOperator::Opcode op,
+ QualType type) {
assert(operand);
assert(!Loc::isLocType(type));
return nonloc::SymbolVal(SymMgr.acquire<UnarySymExpr>(operand, op, type));
@@ -432,24 +436,17 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op,
QualType ResultTy) {
SymbolRef symLHS = LHS.getAsSymbol();
SymbolRef symRHS = RHS.getAsSymbol();
+ auto lInt = LHS.getAs<nonloc::ConcreteInt>();
+ auto rInt = RHS.getAs<nonloc::ConcreteInt>();
- // TODO: When the Max Complexity is reached, we should conjure a symbol
- // instead of generating an Unknown value and propagate the taint info to it.
- const unsigned MaxComp = AnOpts.MaxSymbolComplexity;
-
- if (symLHS && symRHS &&
- (symLHS->computeComplexity() + symRHS->computeComplexity()) < MaxComp)
+ if (symLHS && symRHS)
return makeNonLoc(symLHS, Op, symRHS, ResultTy);
- if (symLHS && symLHS->computeComplexity() < MaxComp)
- if (std::optional<nonloc::ConcreteInt> rInt =
- RHS.getAs<nonloc::ConcreteInt>())
- return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy);
+ if (symLHS && rInt)
+ return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy);
- if (symRHS && symRHS->computeComplexity() < MaxComp)
- if (std::optional<nonloc::ConcreteInt> lInt =
- LHS.getAs<nonloc::ConcreteInt>())
- return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy);
+ if (lInt && symRHS)
+ return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy);
return UnknownVal();
}
@@ -614,10 +611,12 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val,
// Check the range of the symbol being casted against the maximum value of the
// target type.
QualType CmpTy = getConditionType();
- NonLoc CompVal = evalBinOpNN(state, BO_LE, *AsNonLoc, ToTypeMaxVal, CmpTy)
- .castAs<NonLoc>();
+ auto CompVal =
+ evalBinOpNN(state, BO_LE, *AsNonLoc, ToTypeMaxVal, CmpTy).getAs<NonLoc>();
+ if (!CompVal)
+ return UnknownVal();
ProgramStateRef IsNotTruncated, IsTruncated;
- std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal);
+ std::tie(IsNotTruncated, IsTruncated) = state->assume(*CompVal);
if (!IsNotTruncated && IsTruncated) {
// Symbol is truncated so we evaluate it as a cast.
return makeNonLoc(AsSymbol, originalTy, castTy);
diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index 84a9c43d3572e..ef8c3b6b4bdac 100644
--- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -291,9 +291,9 @@ static std::pair<SymbolRef, APSIntPtr> decomposeSymbol(SymbolRef Sym,
// same signed integral type and no overflows occur (which should be checked
// by the caller).
static NonLoc doRearrangeUnchecked(ProgramStateRef State,
- BinaryOperator::Opcode Op,
- SymbolRef LSym, llvm::APSInt LInt,
- SymbolRef RSym, llvm::APSInt RInt) {
+ BinaryOperator::Opcode Op, SymbolRef LSym,
+ llvm::APSInt LInt, SymbolRef RSym,
+ llvm::APSInt RInt) {
SValBuilder &SVB = State->getStateManager().getSValBuilder();
BasicValueFactory &BV = SVB.getBasicValueFactory();
SymbolManager &SymMgr = SVB.getSymbolManager();
diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
index d03f47fa301e6..2149413189dcd 100644
--- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -32,6 +32,7 @@ using namespace ento;
void SymExpr::anchor() {}
StringRef SymbolConjured::getKindStr() const { return "conj_$"; }
+StringRef SymbolOverlyComplex::getKindStr() const { return "complex_$"; }
StringRef SymbolDerived::getKindStr() const { return "derived_$"; }
StringRef SymbolExtent::getKindStr() const { return "extent_$"; }
StringRef SymbolMetadata::getKindStr() const { return "meta_$"; }
@@ -128,6 +129,10 @@ void SymbolConjured::dumpToStream(raw_ostream &os) const {
os << ", #" << Count << '}';
}
+void SymbolOverlyComplex::dumpToStream(raw_ostream &os) const {
+ os << getKindStr() << getSymbolID();
+}
+
void SymbolDerived::dumpToStream(raw_ostream &os) const {
os << getKindStr() << getSymbolID() << '{' << getParentSymbol() << ','
<< getRegion() << '}';
@@ -176,6 +181,7 @@ void SymExpr::symbol_iterator::expand() {
switch (SE->getKind()) {
case SymExpr::SymbolRegionValueKind:
case SymExpr::SymbolConjuredKind:
+ case SymExpr::SymbolOverlyComplexKind:
case SymExpr::SymbolDerivedKind:
case SymExpr::SymbolExtentKind:
case SymExpr::SymbolMetadataKind:
@@ -238,6 +244,12 @@ bool SymbolManager::canSymbolicate(QualType T) {
return false;
}
+const SymbolConjured *SymbolManager::conjureSymbol(
+ ConstCFGElementRef Elem, const LocationContext *LCtx, QualType T,
+ unsigned VisitCount, const void *SymbolTag /*=nullptr*/) {
+ return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag);
+}
+
void SymbolManager::addSymbolDependency(const SymbolRef Primary,
const SymbolRef Dependent) {
auto &dependencies = SymbolDependencies[Primary];
@@ -346,6 +358,7 @@ bool SymbolReaper::isLive(SymbolRef sym) {
KnownLive = isReadableRegion(cast<SymbolRegionValue>(sym)->getRegion());
break;
case SymExpr::SymbolConjuredKind:
+ case SymExpr::SymbolOverlyComplexKind:
KnownLive = false;
break;
case SymExpr::SymbolDerivedKind:
diff --git a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
new file mode 100644
index 0000000000000..bd076f2709422
--- /dev/null
+++ b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection %s -verify
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ConfigDumper 2>&1 | FileCheck %s --match-full-lines
+// CHECK: max-symbol-complexity = 35
+
+void clang_analyzer_dump(int v);
+
+void pumpSymbolComplexity() {
+ extern int *p;
+ *p = (*p + 1) & 1023; // 2
+ *p = (*p + 1) & 1023; // 4
+ *p = (*p + 1) & 1023; // 6
+ *p = (*p + 1) & 1023; // 8
+ *p = (*p + 1) & 1023; // 10
+ *p = (*p + 1) & 1023; // 12
+ *p = (*p + 1) & 1023; // 14
+ *p = (*p + 1) & 1023; // 16
+ *p = (*p + 1) & 1023; // 18
+ *p = (*p + 1) & 1023; // 20
+ *p = (*p + 1) & 1023; // 22
+ *p = (*p + 1) & 1023; // 24
+ *p = (*p + 1) & 1023; // 26
+ *p = (*p + 1) & 1023; // 28
+ *p = (*p + 1) & 1023; // 30
+ *p = (*p + 1) & 1023; // 32
+ *p = (*p + 1) & 1023; // 34
+
+ // The complexity of "*p" is below 35, so it's accurate.
+ clang_analyzer_dump(*p);
+ // expected-warning-re at -1 {{{{^\({34}reg}}}}
+
+ // We would increase the complexity over the threshold, thus it'll get simplified.
+ *p = (*p + 1) & 1023; // Would be 36, which is over 35.
+
+ // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify.
+ clang_analyzer_dump(*p);
+ // expected-warning-re at -1 {{{{^}}(complex_${{[0-9]+}}) & 1023}} [debug.ExprInspection]{{$}}}}
+}
+
+void hugelyOverComplicatedSymbol() {
+#define TEN_TIMES(x) x x x x x x x x x x
+#define HUNDRED_TIMES(x) TEN_TIMES(TEN_TIMES(x))
+ extern int *p;
+ HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+ HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+ HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+ HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+ *p = (*p + 1) & 1023;
+ *p = (*p + 1) & 1023;
+ *p = (*p + 1) & 1023;
+ *p = (*p + 1) & 1023;
+
+ // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify.
+ clang_analyzer_dump(*p);
+ // expected-warning-re at -1 {{{{^}}(((complex_${{[0-9]+}}) & 1023) + 1) & 1023 [debug.ExprInspection]{{$}}}}
+#undef HUNDRED_TIMES
+#undef TEN_TIMES
+}
>From 8c29bbc6c309efe0a1d6af948f95dbb9654971b8 Mon Sep 17 00:00:00 2001
From: Balazs Benics <balazs.benics at sonarsource.com>
Date: Fri, 27 Jun 2025 08:06:16 +0200
Subject: [PATCH 2/2] Fix regression (mostly)
---
.../Core/PathSensitive/SymbolManager.h | 47 +++++++++++++++----
.../ensure-capped-symbol-complexity.cpp | 35 ++++++++++++++
2 files changed, 73 insertions(+), 9 deletions(-)
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 9c27669a61ba9..6a0f4ac528a5e 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -49,6 +49,7 @@ class SymbolRegionValue : public SymbolData {
const TypedValueRegion *R;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
: SymbolData(ClassKind, sym), R(r) {
assert(r);
@@ -91,6 +92,7 @@ class SymbolConjured : public SymbolData {
const void *SymbolTag;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolConjured(SymbolID sym, ConstCFGElementRef elem,
const LocationContext *lctx, QualType t, unsigned count,
const void *symbolTag)
@@ -146,6 +148,7 @@ class SymbolOverlyComplex final : public SymbolData {
const SymExpr *OverlyComplicatedSymbol;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolOverlyComplex(SymbolID Sym, const SymExpr *OverlyComplicatedSymbol)
: SymbolData(ClassKind, Sym),
@@ -185,6 +188,7 @@ class SymbolDerived : public SymbolData {
const TypedValueRegion *R;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
: SymbolData(ClassKind, sym), parentSymbol(parent), R(r) {
assert(parent);
@@ -229,6 +233,7 @@ class SymbolExtent : public SymbolData {
const SubRegion *R;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolExtent(SymbolID sym, const SubRegion *r)
: SymbolData(ClassKind, sym), R(r) {
assert(r);
@@ -274,6 +279,7 @@ class SymbolMetadata : public SymbolData {
const void *Tag;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolMetadata(SymbolID sym, const MemRegion *r, const Stmt *s, QualType t,
const LocationContext *LCtx, unsigned count, const void *tag)
: SymbolData(ClassKind, sym), R(r), S(s), T(t), LCtx(LCtx), Count(count),
@@ -340,6 +346,7 @@ class SymbolCast : public SymExpr {
QualType ToTy;
friend class SymExprAllocator;
+ friend class SymbolManager;
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
: SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In),
FromTy(From), ToTy(To) {
@@ -387,6 +394,7 @@ class UnarySymExpr : public SymExpr {
QualType T;
friend class SymExprAllocator;
+ friend class SymbolManager;
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
QualType T)
: SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In),
@@ -484,6 +492,7 @@ class BinarySymExprImpl : public BinarySymExpr {
RHSTYPE RHS;
friend class SymExprAllocator;
+ friend class SymbolManager;
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
RHSTYPE rhs, QualType t)
: BinarySymExpr(Sym, ClassKind, op, t,
@@ -728,18 +737,38 @@ class SymbolVisitor {
// Returns a const pointer to T if T is a SymbolData, otherwise SymExpr.
template <typename T, typename... Args, typename Ret>
const Ret *SymbolManager::acquire(Args &&...args) {
- llvm::FoldingSetNodeID profile;
- T::Profile(profile, args...);
- void *InsertPos;
- SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
- if (!SD) {
- SD = Alloc.make<T>(std::forward<Args>(args)...);
- DataSet.InsertNode(SD, InsertPos);
- if (SD->complexity() > MaxCompComplexity) {
- return cast<Ret>(acquire<SymbolOverlyComplex>(SD));
+ T Dummy(/*SymbolID=*/0, args...);
+ llvm::FoldingSetNodeID DummyProfile;
+ Dummy.Profile(DummyProfile);
+
+ if (Dummy.complexity() <= MaxCompComplexity) {
+ void *InsertPos;
+ SymExpr *SD = DataSet.FindNodeOrInsertPos(DummyProfile, InsertPos);
+ if (!SD) {
+ SD = Alloc.make<T>(args...);
+ DataSet.InsertNode(SD, InsertPos);
}
+ return cast<Ret>(SD);
+ }
+ void *WrappedSymInsertPos;
+ SymExpr *WrappedSym =
+ DataSet.FindNodeOrInsertPos(DummyProfile, WrappedSymInsertPos);
+ if (!WrappedSym) {
+ WrappedSym = Alloc.make<T>(args...);
+ DataSet.InsertNode(WrappedSym, WrappedSymInsertPos);
}
+ SymbolOverlyComplex OverlyComplexSym(/*SymbolID=*/0, WrappedSym);
+ llvm::FoldingSetNodeID OverlyComplexSymProfile;
+ OverlyComplexSym.Profile(OverlyComplexSymProfile);
+
+ void *OverlyComplexSymInsertPos;
+ SymExpr *SD = DataSet.FindNodeOrInsertPos(OverlyComplexSymProfile,
+ OverlyComplexSymInsertPos);
+ if (!SD) {
+ SD = Alloc.make<SymbolOverlyComplex>(WrappedSym);
+ DataSet.InsertNode(SD, OverlyComplexSymInsertPos);
+ }
return cast<Ret>(SD);
}
diff --git a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
index bd076f2709422..2e716900c3847 100644
--- a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
+++ b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
@@ -56,3 +56,38 @@ void hugelyOverComplicatedSymbol() {
#undef HUNDRED_TIMES
#undef TEN_TIMES
}
+
+typedef unsigned long long __attribute__((aligned((8)))) u64a;
+u64a compress64(u64a x, u64a m) {
+ if ((x & m) == 0)
+ return 0;
+ x &= m;
+ u64a mk = ~m << 1;
+ for (unsigned i = 0; i < 6; i++) {
+ u64a mp = mk ^ (mk << 1);
+ mp ^= mp << 2;
+ mp ^= mp << 4;
+ mp ^= mp << 8;
+ mp ^= mp << 16;
+ mp ^= mp << 32;
+ u64a mv = mp & m;
+ m = (m ^ mv) | (mv >> (1 << i));
+ u64a t = x & mv;
+ x = (x ^ t) | (t >> (1 << i));
+ mk = mk & ~mp;
+ }
+ return x;
+}
+void storecompressed512_64bit(u64a *m, u64a *x) {
+ u64a v[8] = {
+ compress64(x[0], m[0]),
+ compress64(x[1], m[1]),
+ compress64(x[2], m[2]),
+ compress64(x[3], m[3]),
+ compress64(x[4], m[4]),
+ compress64(x[5], m[5]),
+ compress64(x[6], m[6]),
+ compress64(x[7], m[7]),
+ };
+ (void)v;
+}
More information about the cfe-commits
mailing list