[clang] [analyzer] Enforce not making overly complicated symbols (PR #144327)

Balázs Benics via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 27 01:55:18 PDT 2025


https://github.com/balazs-benics-sonarsource updated https://github.com/llvm/llvm-project/pull/144327

>From bc7dfc2b4f143c2caa1189db096bf9e4ea76f053 Mon Sep 17 00:00:00 2001
From: Balazs Benics <balazs.benics at sonarsource.com>
Date: Mon, 16 Jun 2025 12:14:06 +0200
Subject: [PATCH 1/2] [analyzer] Enforce not making overly complicated symbols

CPP-6182
---
 .../Core/PathSensitive/SValBuilder.h          |   8 +-
 .../Core/PathSensitive/SymExpr.h              |  19 +--
 .../Core/PathSensitive/SymbolManager.h        | 127 +++++++++++++-----
 .../Core/PathSensitive/Symbols.def            |   1 +
 clang/lib/StaticAnalyzer/Checkers/Taint.cpp   |   2 +-
 .../Checkers/TrustNonnullChecker.cpp          |   2 +-
 clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp |   2 +-
 .../Core/RangeConstraintManager.cpp           |  11 +-
 clang/lib/StaticAnalyzer/Core/SValBuilder.cpp |  43 +++---
 .../StaticAnalyzer/Core/SimpleSValBuilder.cpp |   6 +-
 .../lib/StaticAnalyzer/Core/SymbolManager.cpp |  13 ++
 .../ensure-capped-symbol-complexity.cpp       |  58 ++++++++
 12 files changed, 211 insertions(+), 81 deletions(-)
 create mode 100644 clang/test/Analysis/ensure-capped-symbol-complexity.cpp

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
index 2911554de9d97..fae78c564e0ab 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h
@@ -57,6 +57,8 @@ class SValBuilder {
 protected:
   ASTContext &Context;
 
+  const AnalyzerOptions &AnOpts;
+
   /// Manager of APSInt values.
   BasicValueFactory BasicVals;
 
@@ -68,8 +70,6 @@ class SValBuilder {
 
   ProgramStateManager &StateMgr;
 
-  const AnalyzerOptions &AnOpts;
-
   /// The scalar type to use for array indices.
   const QualType ArrayIndexTy;
 
@@ -326,8 +326,8 @@ class SValBuilder {
   nonloc::SymbolVal makeNonLoc(const SymExpr *lhs, BinaryOperator::Opcode op,
                                const SymExpr *rhs, QualType type);
 
-  NonLoc makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
-                    QualType type);
+  nonloc::SymbolVal makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
+                               QualType type);
 
   /// Create a NonLoc value for cast.
   nonloc::SymbolVal makeNonLoc(const SymExpr *operand, QualType fromTy,
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
index 6233a22d2ca2b..11d0a22a31c46 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -51,9 +51,11 @@ class SymExpr : public llvm::FoldingSetNode {
   /// Note, however, that it can't be used in Profile because SymbolManager
   /// needs to compute Profile before allocating SymExpr.
   const SymbolID Sym;
+  const unsigned Complexity;
 
 protected:
-  SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}
+  SymExpr(Kind k, SymbolID Sym, unsigned Complexity)
+      : K(k), Sym(Sym), Complexity(Complexity) {}
 
   static bool isValidTypeForSymbol(QualType T) {
     // FIXME: Depending on whether we choose to deprecate structural symbols,
@@ -61,8 +63,6 @@ class SymExpr : public llvm::FoldingSetNode {
     return !T.isNull() && !T->isVoidType();
   }
 
-  mutable unsigned Complexity = 0;
-
 public:
   virtual ~SymExpr() = default;
 
@@ -108,7 +108,7 @@ class SymExpr : public llvm::FoldingSetNode {
     return llvm::make_range(symbol_iterator(this), symbol_iterator());
   }
 
-  virtual unsigned computeComplexity() const = 0;
+  unsigned complexity() const { return Complexity; }
 
   /// Find the region from which this symbol originates.
   ///
@@ -136,10 +136,15 @@ using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
 /// A symbol representing data which can be stored in a memory location
 /// (region).
 class SymbolData : public SymExpr {
+  friend class SymbolManager;
   void anchor() override;
 
 protected:
-  SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }
+  SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym, computeComplexity()) {
+    assert(classof(this));
+  }
+
+  static unsigned computeComplexity(...) { return 1; }
 
 public:
   ~SymbolData() override = default;
@@ -147,10 +152,6 @@ class SymbolData : public SymExpr {
   /// Get a string representation of the kind of the region.
   virtual StringRef getKindStr() const = 0;
 
-  unsigned computeComplexity() const override {
-    return 1;
-  };
-
   // Implement isa<T> support.
   static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
   static constexpr bool classof(Kind K) {
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 7af86cd721e8e..9c27669a61ba9 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -15,9 +15,11 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
 
 #include "clang/AST/Expr.h"
+#include "clang/AST/OperationKinds.h"
 #include "clang/AST/Type.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h"
@@ -26,9 +28,11 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Allocator.h"
 #include <cassert>
+#include <type_traits>
 
 namespace clang {
 
@@ -133,6 +137,47 @@ class SymbolConjured : public SymbolData {
   static constexpr bool classof(Kind K) { return K == ClassKind; }
 };
 
+/// A symbol representing the result of an expression that became too
+/// complicated. In other words, its complexity surpassed the
+/// MaxSymbolComplexity threshold.
+/// TODO: When the MaxSymbolComplexity is reached, we should propagate the taint
+/// info to it.
+class SymbolOverlyComplex final : public SymbolData {
+  const SymExpr *OverlyComplicatedSymbol;
+
+  friend class SymExprAllocator;
+
+  SymbolOverlyComplex(SymbolID Sym, const SymExpr *OverlyComplicatedSymbol)
+      : SymbolData(ClassKind, Sym),
+        OverlyComplicatedSymbol(OverlyComplicatedSymbol) {
+    assert(OverlyComplicatedSymbol);
+  }
+
+public:
+  QualType getType() const override {
+    return OverlyComplicatedSymbol->getType();
+  }
+
+  StringRef getKindStr() const override;
+
+  void dumpToStream(raw_ostream &os) const override;
+
+  static void Profile(llvm::FoldingSetNodeID &profile,
+                      const SymExpr *OverlyComplicatedSymbol) {
+    profile.AddInteger((unsigned)ClassKind);
+    profile.AddPointer(OverlyComplicatedSymbol);
+  }
+
+  void Profile(llvm::FoldingSetNodeID &profile) override {
+    Profile(profile, OverlyComplicatedSymbol);
+  }
+
+  // Implement isa<T> support.
+  static constexpr Kind ClassKind = SymbolOverlyComplexKind;
+  static bool classof(const SymExpr *SE) { return classof(SE->getKind()); }
+  static constexpr bool classof(Kind K) { return K == ClassKind; }
+};
+
 /// A symbol representing the value of a MemRegion whose parent region has
 /// symbolic value.
 class SymbolDerived : public SymbolData {
@@ -285,6 +330,7 @@ class SymbolMetadata : public SymbolData {
 
 /// Represents a cast expression.
 class SymbolCast : public SymExpr {
+  friend class SymbolManager;
   const SymExpr *Operand;
 
   /// Type of the operand.
@@ -295,20 +341,19 @@ class SymbolCast : public SymExpr {
 
   friend class SymExprAllocator;
   SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
-      : SymExpr(ClassKind, Sym), Operand(In), FromTy(From), ToTy(To) {
+      : SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In),
+        FromTy(From), ToTy(To) {
     assert(In);
     assert(isValidTypeForSymbol(From));
     // FIXME: GenericTaintChecker creates symbols of void type.
     // Otherwise, 'To' should also be a valid type.
   }
 
-public:
-  unsigned computeComplexity() const override {
-    if (Complexity == 0)
-      Complexity = 1 + Operand->computeComplexity();
-    return Complexity;
+  static unsigned computeComplexity(const SymExpr *In, QualType, QualType) {
+    return In->complexity() + 1;
   }
 
+public:
   QualType getType() const override { return ToTy; }
 
   LLVM_ATTRIBUTE_RETURNS_NONNULL
@@ -336,6 +381,7 @@ class SymbolCast : public SymExpr {
 
 /// Represents a symbolic expression involving a unary operator.
 class UnarySymExpr : public SymExpr {
+  friend class SymbolManager;
   const SymExpr *Operand;
   UnaryOperator::Opcode Op;
   QualType T;
@@ -343,7 +389,8 @@ class UnarySymExpr : public SymExpr {
   friend class SymExprAllocator;
   UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
                QualType T)
-      : SymExpr(ClassKind, Sym), Operand(In), Op(Op), T(T) {
+      : SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In),
+        Op(Op), T(T) {
     // Note, some unary operators are modeled as a binary operator. E.g. ++x is
     // modeled as x + 1.
     assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -354,13 +401,12 @@ class UnarySymExpr : public SymExpr {
     assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
   }
 
-public:
-  unsigned computeComplexity() const override {
-    if (Complexity == 0)
-      Complexity = 1 + Operand->computeComplexity();
-    return Complexity;
+  static unsigned computeComplexity(const SymExpr *In, UnaryOperator::Opcode,
+                                    QualType) {
+    return In->complexity() + 1;
   }
 
+public:
   const SymExpr *getOperand() const { return Operand; }
   UnaryOperator::Opcode getOpcode() const { return Op; }
   QualType getType() const override { return T; }
@@ -391,8 +437,9 @@ class BinarySymExpr : public SymExpr {
   QualType T;
 
 protected:
-  BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
-      : SymExpr(k, Sym), Op(op), T(t) {
+  BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t,
+                unsigned Complexity)
+      : SymExpr(k, Sym, Complexity), Op(op), T(t) {
     assert(classof(this));
     // Binary expressions are results of arithmetic. Pointer arithmetic is not
     // handled by binary expressions, but it is instead handled by applying
@@ -415,7 +462,7 @@ class BinarySymExpr : public SymExpr {
 
 protected:
   static unsigned computeOperandComplexity(const SymExpr *Value) {
-    return Value->computeComplexity();
+    return Value->complexity();
   }
   static unsigned computeOperandComplexity(const llvm::APSInt &Value) {
     return 1;
@@ -432,17 +479,26 @@ class BinarySymExpr : public SymExpr {
 /// Template implementation for all binary symbolic expressions
 template <class LHSTYPE, class RHSTYPE, SymExpr::Kind ClassK>
 class BinarySymExprImpl : public BinarySymExpr {
+  friend class SymbolManager;
   LHSTYPE LHS;
   RHSTYPE RHS;
 
   friend class SymExprAllocator;
   BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
                     RHSTYPE rhs, QualType t)
-      : BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
+      : BinarySymExpr(Sym, ClassKind, op, t,
+                      computeComplexity(lhs, op, rhs, t)),
+        LHS(lhs), RHS(rhs) {
     assert(getPointer(lhs));
     assert(getPointer(rhs));
   }
 
+  static unsigned computeComplexity(LHSTYPE lhs, BinaryOperator::Opcode,
+                                    RHSTYPE rhs, QualType) {
+    // FIXME: Should we add 1 to complexity?
+    return computeOperandComplexity(lhs) + computeOperandComplexity(rhs);
+  }
+
 public:
   void dumpToStream(raw_ostream &os) const override {
     dumpToStreamImpl(os, LHS);
@@ -453,13 +509,6 @@ class BinarySymExprImpl : public BinarySymExpr {
   LHSTYPE getLHS() const { return LHS; }
   RHSTYPE getRHS() const { return RHS; }
 
-  unsigned computeComplexity() const override {
-    if (Complexity == 0)
-      Complexity =
-          computeOperandComplexity(RHS) + computeOperandComplexity(LHS);
-    return Complexity;
-  }
-
   static void Profile(llvm::FoldingSetNodeID &ID, LHSTYPE lhs,
                       BinaryOperator::Opcode op, RHSTYPE rhs, QualType t) {
     ID.AddInteger((unsigned)ClassKind);
@@ -520,27 +569,30 @@ class SymbolManager {
   SymExprAllocator Alloc;
   BasicValueFactory &BV;
   ASTContext &Ctx;
+  const unsigned MaxCompComplexity;
 
 public:
   SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
-                llvm::BumpPtrAllocator &bpalloc)
-      : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}
+                llvm::BumpPtrAllocator &bpalloc, const AnalyzerOptions &Opts)
+      : SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx),
+        MaxCompComplexity(Opts.MaxSymbolComplexity) {
+    assert(MaxCompComplexity > 0 && "Zero max complexity doesn't make sense");
+  }
 
   static bool canSymbolicate(QualType T);
 
-  /// Create or retrieve a SymExpr of type \p SymExprT for the given arguments.
+  /// Create or retrieve a SymExpr of type \p T for the given arguments.
   /// Use the arguments to check for an existing SymExpr and return it,
   /// otherwise, create a new one and keep a pointer to it to avoid duplicates.
-  template <typename SymExprT, typename... Args>
-  const SymExprT *acquire(Args &&...args);
+  template <typename T, typename... Args,
+            typename Ret = std::conditional_t<SymbolData::classof(T::ClassKind),
+                                              T, SymExpr>>
+  LLVM_ATTRIBUTE_RETURNS_NONNULL const Ret *acquire(Args &&...args);
 
   const SymbolConjured *conjureSymbol(ConstCFGElementRef Elem,
                                       const LocationContext *LCtx, QualType T,
                                       unsigned VisitCount,
-                                      const void *SymbolTag = nullptr) {
-
-    return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag);
-  }
+                                      const void *SymbolTag = nullptr);
 
   QualType getType(const SymExpr *SE) const {
     return SE->getType();
@@ -673,8 +725,9 @@ class SymbolVisitor {
   virtual bool VisitMemRegion(const MemRegion *) { return true; }
 };
 
-template <typename T, typename... Args>
-const T *SymbolManager::acquire(Args &&...args) {
+// Returns a const pointer to T if T is a SymbolData, otherwise SymExpr.
+template <typename T, typename... Args, typename Ret>
+const Ret *SymbolManager::acquire(Args &&...args) {
   llvm::FoldingSetNodeID profile;
   T::Profile(profile, args...);
   void *InsertPos;
@@ -682,8 +735,12 @@ const T *SymbolManager::acquire(Args &&...args) {
   if (!SD) {
     SD = Alloc.make<T>(std::forward<Args>(args)...);
     DataSet.InsertNode(SD, InsertPos);
+    if (SD->complexity() > MaxCompComplexity) {
+      return cast<Ret>(acquire<SymbolOverlyComplex>(SD));
+    }
   }
-  return cast<T>(SD);
+
+  return cast<Ret>(SD);
 }
 
 } // namespace ento
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def
index b93f8e2501559..1c96e05f05b7d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Symbols.def
@@ -45,6 +45,7 @@ SYMBOL(SymbolCast, SymExpr)
 
 ABSTRACT_SYMBOL(SymbolData, SymExpr)
   SYMBOL(SymbolConjured, SymbolData)
+  SYMBOL(SymbolOverlyComplex, SymbolData)
   SYMBOL(SymbolDerived, SymbolData)
   SYMBOL(SymbolExtent, SymbolData)
   SYMBOL(SymbolMetadata, SymbolData)
diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
index e55d064253b84..f0dc889f15e7a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -267,7 +267,7 @@ std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State,
 
   // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570
   if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions();
-      Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) {
+      Sym->complexity() > Opts.MaxTaintedSymbolComplexity) {
     return {};
   }
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
index e2f8bd541c967..ab0e3d8f56d86 100644
--- a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp
@@ -66,7 +66,7 @@ class TrustNonnullChecker : public Checker<check::PostCall,
                              SVal Cond,
                              bool Assumption) const {
     const SymbolRef CondS = Cond.getAsSymbol();
-    if (!CondS || CondS->computeComplexity() > ComplexityThreshold)
+    if (!CondS || CondS->complexity() > ComplexityThreshold)
       return State;
 
     for (SymbolRef Antecedent : CondS->symbols()) {
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index fa8e669b6bb2f..3486485dcd686 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -67,7 +67,7 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
       if (RightV.isUnknown()) {
         unsigned Count = currBldrCtx->blockCount();
         RightV = svalBuilder.conjureSymbolVal(nullptr, getCFGElementRef(), LCtx,
-                                              Count);
+                                              RHS->getType(), Count);
       }
       // Simulate the effects of a "store":  bind the value of the RHS
       // to the L-Value represented by the LHS.
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index ab45e678bafd5..0e84a7046ad08 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -1264,7 +1264,9 @@ class SymbolicRangeInferrer
 
 private:
   SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
-      : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
+      : SVB(S->getStateManager().getSValBuilder()),
+        SymMgr(S->getSymbolManager()), ValueFactory(F.getValueFactory()),
+        RangeFactory(F), State(S) {}
 
   /// Infer range information from the given integer constant.
   ///
@@ -1525,8 +1527,6 @@ class SymbolicRangeInferrer
     const SymExpr *RHS = SSE->getRHS();
     QualType T = SSE->getType();
 
-    SymbolManager &SymMgr = State->getSymbolManager();
-
     // We use this variable to store the last queried operator (`QueriedOP`)
     // for which the `getCmpOpState` returned with `Unknown`. If there are two
     // different OPs that returned `Unknown` then we have to query the special
@@ -1540,8 +1540,7 @@ class SymbolicRangeInferrer
 
       // Let's find an expression e.g. (x < y).
       BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
-      const SymSymExpr *SymSym =
-          SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T);
+      SymbolRef SymSym = SymMgr.acquire<SymSymExpr>(LHS, QueriedOP, RHS, T);
       const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
 
       // If ranges were not previously found,
@@ -1622,6 +1621,8 @@ class SymbolicRangeInferrer
     return RangeSet(RangeFactory, Zero);
   }
 
+  SValBuilder &SVB;
+  SymbolManager &SymMgr;
   BasicValueFactory &ValueFactory;
   RangeSet::Factory &RangeFactory;
   ProgramStateRef State;
diff --git a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
index 2276c452cce76..a0f959d8adc1a 100644
--- a/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -51,11 +51,11 @@ void SValBuilder::anchor() {}
 
 SValBuilder::SValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
                          ProgramStateManager &stateMgr)
-    : Context(context), BasicVals(context, alloc),
-      SymMgr(context, BasicVals, alloc), MemMgr(context, alloc),
-      StateMgr(stateMgr),
+    : Context(context),
       AnOpts(
           stateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions()),
+      BasicVals(context, alloc), SymMgr(context, BasicVals, alloc, AnOpts),
+      MemMgr(context, alloc), StateMgr(stateMgr),
       ArrayIndexTy(context.LongLongTy),
       ArrayIndexWidth(context.getTypeSize(ArrayIndexTy)) {}
 
@@ -77,6 +77,9 @@ DefinedOrUnknownSVal SValBuilder::makeZeroVal(QualType type) {
 nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs,
                                           BinaryOperator::Opcode op,
                                           APSIntPtr rhs, QualType type) {
+  // The Environment ensures we always get a persistent APSInt in
+  // BasicValueFactory, so we don't need to get the APSInt from
+  // BasicValueFactory again.
   assert(lhs);
   assert(!Loc::isLocType(type));
   return nonloc::SymbolVal(SymMgr.acquire<SymIntExpr>(lhs, op, rhs, type));
@@ -98,8 +101,9 @@ nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *lhs,
   return nonloc::SymbolVal(SymMgr.acquire<SymSymExpr>(lhs, op, rhs, type));
 }
 
-NonLoc SValBuilder::makeNonLoc(const SymExpr *operand, UnaryOperator::Opcode op,
-                               QualType type) {
+nonloc::SymbolVal SValBuilder::makeNonLoc(const SymExpr *operand,
+                                          UnaryOperator::Opcode op,
+                                          QualType type) {
   assert(operand);
   assert(!Loc::isLocType(type));
   return nonloc::SymbolVal(SymMgr.acquire<UnarySymExpr>(operand, op, type));
@@ -432,24 +436,17 @@ SVal SValBuilder::makeSymExprValNN(BinaryOperator::Opcode Op,
                                    QualType ResultTy) {
   SymbolRef symLHS = LHS.getAsSymbol();
   SymbolRef symRHS = RHS.getAsSymbol();
+  auto lInt = LHS.getAs<nonloc::ConcreteInt>();
+  auto rInt = RHS.getAs<nonloc::ConcreteInt>();
 
-  // TODO: When the Max Complexity is reached, we should conjure a symbol
-  // instead of generating an Unknown value and propagate the taint info to it.
-  const unsigned MaxComp = AnOpts.MaxSymbolComplexity;
-
-  if (symLHS && symRHS &&
-      (symLHS->computeComplexity() + symRHS->computeComplexity()) <  MaxComp)
+  if (symLHS && symRHS)
     return makeNonLoc(symLHS, Op, symRHS, ResultTy);
 
-  if (symLHS && symLHS->computeComplexity() < MaxComp)
-    if (std::optional<nonloc::ConcreteInt> rInt =
-            RHS.getAs<nonloc::ConcreteInt>())
-      return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy);
+  if (symLHS && rInt)
+    return makeNonLoc(symLHS, Op, rInt->getValue(), ResultTy);
 
-  if (symRHS && symRHS->computeComplexity() < MaxComp)
-    if (std::optional<nonloc::ConcreteInt> lInt =
-            LHS.getAs<nonloc::ConcreteInt>())
-      return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy);
+  if (lInt && symRHS)
+    return makeNonLoc(lInt->getValue(), Op, symRHS, ResultTy);
 
   return UnknownVal();
 }
@@ -614,10 +611,12 @@ SVal SValBuilder::evalIntegralCast(ProgramStateRef state, SVal val,
   // Check the range of the symbol being casted against the maximum value of the
   // target type.
   QualType CmpTy = getConditionType();
-  NonLoc CompVal = evalBinOpNN(state, BO_LE, *AsNonLoc, ToTypeMaxVal, CmpTy)
-                       .castAs<NonLoc>();
+  auto CompVal =
+      evalBinOpNN(state, BO_LE, *AsNonLoc, ToTypeMaxVal, CmpTy).getAs<NonLoc>();
+  if (!CompVal)
+    return UnknownVal();
   ProgramStateRef IsNotTruncated, IsTruncated;
-  std::tie(IsNotTruncated, IsTruncated) = state->assume(CompVal);
+  std::tie(IsNotTruncated, IsTruncated) = state->assume(*CompVal);
   if (!IsNotTruncated && IsTruncated) {
     // Symbol is truncated so we evaluate it as a cast.
     return makeNonLoc(AsSymbol, originalTy, castTy);
diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index 84a9c43d3572e..ef8c3b6b4bdac 100644
--- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -291,9 +291,9 @@ static std::pair<SymbolRef, APSIntPtr> decomposeSymbol(SymbolRef Sym,
 // same signed integral type and no overflows occur (which should be checked
 // by the caller).
 static NonLoc doRearrangeUnchecked(ProgramStateRef State,
-                                   BinaryOperator::Opcode Op,
-                                   SymbolRef LSym, llvm::APSInt LInt,
-                                   SymbolRef RSym, llvm::APSInt RInt) {
+                                   BinaryOperator::Opcode Op, SymbolRef LSym,
+                                   llvm::APSInt LInt, SymbolRef RSym,
+                                   llvm::APSInt RInt) {
   SValBuilder &SVB = State->getStateManager().getSValBuilder();
   BasicValueFactory &BV = SVB.getBasicValueFactory();
   SymbolManager &SymMgr = SVB.getSymbolManager();
diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
index d03f47fa301e6..2149413189dcd 100644
--- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -32,6 +32,7 @@ using namespace ento;
 void SymExpr::anchor() {}
 
 StringRef SymbolConjured::getKindStr() const { return "conj_$"; }
+StringRef SymbolOverlyComplex::getKindStr() const { return "complex_$"; }
 StringRef SymbolDerived::getKindStr() const { return "derived_$"; }
 StringRef SymbolExtent::getKindStr() const { return "extent_$"; }
 StringRef SymbolMetadata::getKindStr() const { return "meta_$"; }
@@ -128,6 +129,10 @@ void SymbolConjured::dumpToStream(raw_ostream &os) const {
   os << ", #" << Count << '}';
 }
 
+void SymbolOverlyComplex::dumpToStream(raw_ostream &os) const {
+  os << getKindStr() << getSymbolID();
+}
+
 void SymbolDerived::dumpToStream(raw_ostream &os) const {
   os << getKindStr() << getSymbolID() << '{' << getParentSymbol() << ','
      << getRegion() << '}';
@@ -176,6 +181,7 @@ void SymExpr::symbol_iterator::expand() {
   switch (SE->getKind()) {
     case SymExpr::SymbolRegionValueKind:
     case SymExpr::SymbolConjuredKind:
+    case SymExpr::SymbolOverlyComplexKind:
     case SymExpr::SymbolDerivedKind:
     case SymExpr::SymbolExtentKind:
     case SymExpr::SymbolMetadataKind:
@@ -238,6 +244,12 @@ bool SymbolManager::canSymbolicate(QualType T) {
   return false;
 }
 
+const SymbolConjured *SymbolManager::conjureSymbol(
+    ConstCFGElementRef Elem, const LocationContext *LCtx, QualType T,
+    unsigned VisitCount, const void *SymbolTag /*=nullptr*/) {
+  return acquire<SymbolConjured>(Elem, LCtx, T, VisitCount, SymbolTag);
+}
+
 void SymbolManager::addSymbolDependency(const SymbolRef Primary,
                                         const SymbolRef Dependent) {
   auto &dependencies = SymbolDependencies[Primary];
@@ -346,6 +358,7 @@ bool SymbolReaper::isLive(SymbolRef sym) {
     KnownLive = isReadableRegion(cast<SymbolRegionValue>(sym)->getRegion());
     break;
   case SymExpr::SymbolConjuredKind:
+  case SymExpr::SymbolOverlyComplexKind:
     KnownLive = false;
     break;
   case SymExpr::SymbolDerivedKind:
diff --git a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
new file mode 100644
index 0000000000000..bd076f2709422
--- /dev/null
+++ b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection %s -verify
+
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ConfigDumper 2>&1 | FileCheck %s --match-full-lines
+// CHECK: max-symbol-complexity = 35
+
+void clang_analyzer_dump(int v);
+
+void pumpSymbolComplexity() {
+  extern int *p;
+  *p = (*p + 1) & 1023; //  2
+  *p = (*p + 1) & 1023; //  4
+  *p = (*p + 1) & 1023; //  6
+  *p = (*p + 1) & 1023; //  8
+  *p = (*p + 1) & 1023; // 10
+  *p = (*p + 1) & 1023; // 12
+  *p = (*p + 1) & 1023; // 14
+  *p = (*p + 1) & 1023; // 16
+  *p = (*p + 1) & 1023; // 18
+  *p = (*p + 1) & 1023; // 20
+  *p = (*p + 1) & 1023; // 22
+  *p = (*p + 1) & 1023; // 24
+  *p = (*p + 1) & 1023; // 26
+  *p = (*p + 1) & 1023; // 28
+  *p = (*p + 1) & 1023; // 30
+  *p = (*p + 1) & 1023; // 32
+  *p = (*p + 1) & 1023; // 34
+
+  // The complexity of "*p" is below 35, so it's accurate.
+  clang_analyzer_dump(*p);
+  // expected-warning-re at -1 {{{{^\({34}reg}}}}
+
+  // We would increase the complexity over the threshold, thus it'll get simplified.
+  *p = (*p + 1) & 1023; // Would be 36, which is over 35.
+
+  // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify.
+  clang_analyzer_dump(*p);
+  // expected-warning-re at -1 {{{{^}}(complex_${{[0-9]+}}) & 1023}} [debug.ExprInspection]{{$}}}}
+}
+
+void hugelyOverComplicatedSymbol() {
+#define TEN_TIMES(x) x x x x x x x x x x
+#define HUNDRED_TIMES(x) TEN_TIMES(TEN_TIMES(x))
+  extern int *p;
+  HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+  HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+  HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+  HUNDRED_TIMES(*p = (*p + 1) & 1023;)
+  *p = (*p + 1) & 1023;
+  *p = (*p + 1) & 1023;
+  *p = (*p + 1) & 1023;
+  *p = (*p + 1) & 1023;
+
+  // This dump used to print a hugely complicated symbol, over 800 complexity, taking really long to simplify.
+  clang_analyzer_dump(*p);
+  // expected-warning-re at -1 {{{{^}}(((complex_${{[0-9]+}}) & 1023) + 1) & 1023 [debug.ExprInspection]{{$}}}}
+#undef HUNDRED_TIMES
+#undef TEN_TIMES
+}

>From 8c29bbc6c309efe0a1d6af948f95dbb9654971b8 Mon Sep 17 00:00:00 2001
From: Balazs Benics <balazs.benics at sonarsource.com>
Date: Fri, 27 Jun 2025 08:06:16 +0200
Subject: [PATCH 2/2] Fix regression (mostly)

---
 .../Core/PathSensitive/SymbolManager.h        | 47 +++++++++++++++----
 .../ensure-capped-symbol-complexity.cpp       | 35 ++++++++++++++
 2 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 9c27669a61ba9..6a0f4ac528a5e 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -49,6 +49,7 @@ class SymbolRegionValue : public SymbolData {
   const TypedValueRegion *R;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
       : SymbolData(ClassKind, sym), R(r) {
     assert(r);
@@ -91,6 +92,7 @@ class SymbolConjured : public SymbolData {
   const void *SymbolTag;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   SymbolConjured(SymbolID sym, ConstCFGElementRef elem,
                  const LocationContext *lctx, QualType t, unsigned count,
                  const void *symbolTag)
@@ -146,6 +148,7 @@ class SymbolOverlyComplex final : public SymbolData {
   const SymExpr *OverlyComplicatedSymbol;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
 
   SymbolOverlyComplex(SymbolID Sym, const SymExpr *OverlyComplicatedSymbol)
       : SymbolData(ClassKind, Sym),
@@ -185,6 +188,7 @@ class SymbolDerived : public SymbolData {
   const TypedValueRegion *R;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
       : SymbolData(ClassKind, sym), parentSymbol(parent), R(r) {
     assert(parent);
@@ -229,6 +233,7 @@ class SymbolExtent : public SymbolData {
   const SubRegion *R;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   SymbolExtent(SymbolID sym, const SubRegion *r)
       : SymbolData(ClassKind, sym), R(r) {
     assert(r);
@@ -274,6 +279,7 @@ class SymbolMetadata : public SymbolData {
   const void *Tag;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   SymbolMetadata(SymbolID sym, const MemRegion *r, const Stmt *s, QualType t,
                  const LocationContext *LCtx, unsigned count, const void *tag)
       : SymbolData(ClassKind, sym), R(r), S(s), T(t), LCtx(LCtx), Count(count),
@@ -340,6 +346,7 @@ class SymbolCast : public SymExpr {
   QualType ToTy;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
       : SymExpr(ClassKind, Sym, computeComplexity(In, From, To)), Operand(In),
         FromTy(From), ToTy(To) {
@@ -387,6 +394,7 @@ class UnarySymExpr : public SymExpr {
   QualType T;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
                QualType T)
       : SymExpr(ClassKind, Sym, computeComplexity(In, Op, T)), Operand(In),
@@ -484,6 +492,7 @@ class BinarySymExprImpl : public BinarySymExpr {
   RHSTYPE RHS;
 
   friend class SymExprAllocator;
+  friend class SymbolManager;
   BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
                     RHSTYPE rhs, QualType t)
       : BinarySymExpr(Sym, ClassKind, op, t,
@@ -728,18 +737,38 @@ class SymbolVisitor {
 // Returns a const pointer to T if T is a SymbolData, otherwise SymExpr.
 template <typename T, typename... Args, typename Ret>
 const Ret *SymbolManager::acquire(Args &&...args) {
-  llvm::FoldingSetNodeID profile;
-  T::Profile(profile, args...);
-  void *InsertPos;
-  SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
-  if (!SD) {
-    SD = Alloc.make<T>(std::forward<Args>(args)...);
-    DataSet.InsertNode(SD, InsertPos);
-    if (SD->complexity() > MaxCompComplexity) {
-      return cast<Ret>(acquire<SymbolOverlyComplex>(SD));
+  T Dummy(/*SymbolID=*/0, args...);
+  llvm::FoldingSetNodeID DummyProfile;
+  Dummy.Profile(DummyProfile);
+
+  if (Dummy.complexity() <= MaxCompComplexity) {
+    void *InsertPos;
+    SymExpr *SD = DataSet.FindNodeOrInsertPos(DummyProfile, InsertPos);
+    if (!SD) {
+      SD = Alloc.make<T>(args...);
+      DataSet.InsertNode(SD, InsertPos);
     }
+    return cast<Ret>(SD);
+  }
+  void *WrappedSymInsertPos;
+  SymExpr *WrappedSym =
+      DataSet.FindNodeOrInsertPos(DummyProfile, WrappedSymInsertPos);
+  if (!WrappedSym) {
+    WrappedSym = Alloc.make<T>(args...);
+    DataSet.InsertNode(WrappedSym, WrappedSymInsertPos);
   }
 
+  SymbolOverlyComplex OverlyComplexSym(/*SymbolID=*/0, WrappedSym);
+  llvm::FoldingSetNodeID OverlyComplexSymProfile;
+  OverlyComplexSym.Profile(OverlyComplexSymProfile);
+
+  void *OverlyComplexSymInsertPos;
+  SymExpr *SD = DataSet.FindNodeOrInsertPos(OverlyComplexSymProfile,
+                                            OverlyComplexSymInsertPos);
+  if (!SD) {
+    SD = Alloc.make<SymbolOverlyComplex>(WrappedSym);
+    DataSet.InsertNode(SD, OverlyComplexSymInsertPos);
+  }
   return cast<Ret>(SD);
 }
 
diff --git a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
index bd076f2709422..2e716900c3847 100644
--- a/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
+++ b/clang/test/Analysis/ensure-capped-symbol-complexity.cpp
@@ -56,3 +56,38 @@ void hugelyOverComplicatedSymbol() {
 #undef HUNDRED_TIMES
 #undef TEN_TIMES
 }
+
+typedef unsigned long long __attribute__((aligned((8)))) u64a;
+u64a compress64(u64a x, u64a m) {
+  if ((x & m) == 0)
+      return 0;
+  x &= m;
+  u64a mk = ~m << 1;
+  for (unsigned i = 0; i < 6; i++) {
+    u64a mp = mk ^ (mk << 1);
+    mp ^= mp << 2;
+    mp ^= mp << 4;
+    mp ^= mp << 8;
+    mp ^= mp << 16;
+    mp ^= mp << 32;
+    u64a mv = mp & m;
+    m = (m ^ mv) | (mv >> (1 << i));
+    u64a t = x & mv;
+    x = (x ^ t) | (t >> (1 << i));
+    mk = mk & ~mp;
+  }
+  return x;
+}
+void storecompressed512_64bit(u64a *m, u64a *x) {
+  u64a v[8] = {
+    compress64(x[0], m[0]),
+    compress64(x[1], m[1]),
+    compress64(x[2], m[2]),
+    compress64(x[3], m[3]),
+    compress64(x[4], m[4]),
+    compress64(x[5], m[5]),
+    compress64(x[6], m[6]),
+    compress64(x[7], m[7]),
+  };
+  (void)v;
+}



More information about the cfe-commits mailing list