[clang] [clang][analyzer] Stabilize path-constraint order by using alloc IDs (PR #121347)
Arseniy Zaostrovnykh via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 30 08:09:46 PST 2024
https://github.com/necto created https://github.com/llvm/llvm-project/pull/121347
These IDs are essentially allocator offsets for the SymExpr pool. They are superior to raw pointer values because they are more controllable and are not randomized across executions.
They are not stable across runs because some spurious allocations might happen only in certain runs. For example, an unrelated ImmutableMap rotates its AVL tree based on the key values that are pointers. The rotation causes extra allocations in the shared allocator, and the next SymExpr allocation happens at a different offset.
Yet, these IDs *order* is stable across runs because SymExprs are allocated in the same order and allocator offset grows monotonously.
Stability of the constraint order is important for the stability of the analyzer results. I evaluated this change on a set of 200+ open-source C++ projects with the total number of ~78 000 symbolic-execution issues.
This patch reduced the run-to-run churn (flakyness) in SE issues from 80-90 to 30-40 (out of 78K) in our CSA deployment (in our setting flaky issues are mostly due to Z3 refutation instability).
Importantly, this change is necessary for the next step in stabilizing analysis results by caching Z3 query outcomes between analysis runs (work in progress).
Across our admittedly noisy CI runs, I detected no noticeable effect on memory footprint or analysis time.
CPP-5919
>From 2e7b1403095fef710e356542f53a79e4185bdcb5 Mon Sep 17 00:00:00 2001
From: Arseniy Zaostrovnykh <necto.ne at gmail.com>
Date: Fri, 27 Dec 2024 15:22:35 +0100
Subject: [PATCH] [clang][analyzer] Stabilize path-constraint order by using
alloc IDs
These IDs are essentially allocator offsets for the SymExpr pool. They
are superior to raw pointer values because they are more controllable
and are not randomized across executions.
They are not stable across runs because some spurious allocations might happen
only in certain runs. For example, an unrelated ImmutableMap rotates its
AVL tree based on the key values that are pointers. The rotation causes
extra allocations in the shared allocator, and the next SymExpr
allocation happens at a different offset.
Yet, these IDs *order* is stable across runs because SymExprs are
allocated in the same order and allocator offset grows monotonously.
Stability of the constraint order is important for the stability of the
analyzer results. I evaluated this change on a set of 200+ open-source
C++ projects with the total number of ~78 000 symbolic-execution issues.
This patch reduced the run-to-run churn (flakyness) in SE issues from
80-90 to 30-40 (out of 78K) in our CSA deployment (in our setting flaky
issues are mostly due to Z3 refutation instability).
Importantly, this change is necessary for the next step in stabilizing
analysis results by caching Z3 query outcomes between analysis
runs (work in progress).
Across our admittedly noisy CI runs, I detected no noticeable effect on
memory footprint or analysis time.
CPP-5919
---
.../PathSensitive/RangedConstraintManager.h | 17 +++++-
.../Core/PathSensitive/SymExpr.h | 9 ++-
.../Core/PathSensitive/SymbolManager.h | 59 +++++++++++--------
.../lib/StaticAnalyzer/Core/SymbolManager.cpp | 28 +++++----
4 files changed, 74 insertions(+), 39 deletions(-)
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
index 49ea006e27aa54..27ee809a99b2cd 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
@@ -401,7 +401,22 @@ class RangeSet {
friend class Factory;
};
-using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;
+struct ConstraintKVInfo : llvm::ImutKeyValueInfo<SymbolRef, RangeSet> {
+ static inline bool isEqual(key_type_ref L, key_type_ref R) {
+ return L->getAllocID() == R->getAllocID();
+ }
+
+ static inline bool isLess(key_type_ref L, key_type_ref R) {
+ return L->getAllocID() < R->getAllocID();
+ }
+
+ static inline void Profile(llvm::FoldingSetNodeID &ID, value_type_ref V) {
+ ID.AddInteger(V.first->getAllocID());
+ ID.Add(V.second);
+ }
+};
+
+using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet, ConstraintKVInfo>;
ConstraintMap getConstraintMap(ProgramStateRef State);
class RangedConstraintManager : public SimpleConstraintManager {
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
index 862a30c0e73633..91c62886f68261 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
@@ -31,6 +31,7 @@ class SymExpr : public llvm::FoldingSetNode {
virtual void anchor();
public:
+ using AllocIDType = int64_t;
enum Kind {
#define SYMBOL(Id, Parent) Id##Kind,
#define SYMBOL_RANGE(Id, First, Last) BEGIN_##Id = First, END_##Id = Last,
@@ -39,9 +40,10 @@ class SymExpr : public llvm::FoldingSetNode {
private:
Kind K;
+ AllocIDType AllocID;
protected:
- SymExpr(Kind k) : K(k) {}
+ SymExpr(Kind K, AllocIDType AllocID) : K(K), AllocID(AllocID) {}
static bool isValidTypeForSymbol(QualType T) {
// FIXME: Depending on whether we choose to deprecate structural symbols,
@@ -56,6 +58,8 @@ class SymExpr : public llvm::FoldingSetNode {
Kind getKind() const { return K; }
+ AllocIDType getAllocID() const { return AllocID; }
+
virtual void dump() const;
virtual void dumpToStream(raw_ostream &os) const {}
@@ -122,7 +126,8 @@ class SymbolData : public SymExpr {
void anchor() override;
protected:
- SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
+ SymbolData(Kind k, SymbolID sym, AllocIDType AllocID)
+ : SymExpr(k, AllocID), Sym(sym) {
assert(classof(this));
}
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 73732d532f630f..da13f25323d8a2 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -44,8 +44,9 @@ class SymbolRegionValue : public SymbolData {
const TypedValueRegion *R;
public:
- SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
- : SymbolData(SymbolRegionValueKind, sym), R(r) {
+ SymbolRegionValue(SymbolID sym, const TypedValueRegion *r,
+ AllocIDType AllocID)
+ : SymbolData(SymbolRegionValueKind, sym, AllocID), R(r) {
assert(r);
assert(isValidTypeForSymbol(r->getValueType()));
}
@@ -86,8 +87,9 @@ class SymbolConjured : public SymbolData {
public:
SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
- QualType t, unsigned count, const void *symbolTag)
- : SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
+ QualType t, unsigned count, const void *symbolTag,
+ AllocIDType AllocID)
+ : SymbolData(SymbolConjuredKind, sym, AllocID), S(s), T(t), Count(count),
LCtx(lctx), SymbolTag(symbolTag) {
// FIXME: 's' might be a nullptr if we're conducting invalidation
// that was caused by a destructor call on a temporary object,
@@ -138,8 +140,10 @@ class SymbolDerived : public SymbolData {
const TypedValueRegion *R;
public:
- SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
- : SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
+ SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r,
+ AllocIDType AllocID)
+ : SymbolData(SymbolDerivedKind, sym, AllocID), parentSymbol(parent),
+ R(r) {
assert(parent);
assert(r);
assert(isValidTypeForSymbol(r->getValueType()));
@@ -181,8 +185,8 @@ class SymbolExtent : public SymbolData {
const SubRegion *R;
public:
- SymbolExtent(SymbolID sym, const SubRegion *r)
- : SymbolData(SymbolExtentKind, sym), R(r) {
+ SymbolExtent(SymbolID sym, const SubRegion *r, AllocIDType AllocID)
+ : SymbolData(SymbolExtentKind, sym, AllocID), R(r) {
assert(r);
}
@@ -223,16 +227,17 @@ class SymbolMetadata : public SymbolData {
const void *Tag;
public:
- SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
- const LocationContext *LCtx, unsigned count, const void *tag)
- : SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
- Count(count), Tag(tag) {
- assert(r);
- assert(s);
- assert(isValidTypeForSymbol(t));
- assert(LCtx);
- assert(tag);
- }
+ SymbolMetadata(SymbolID sym, const MemRegion *r, const Stmt *s, QualType t,
+ const LocationContext *LCtx, unsigned count, const void *tag,
+ AllocIDType AllocID)
+ : SymbolData(SymbolMetadataKind, sym, AllocID), R(r), S(s), T(t),
+ LCtx(LCtx), Count(count), Tag(tag) {
+ assert(r);
+ assert(s);
+ assert(isValidTypeForSymbol(t));
+ assert(LCtx);
+ assert(tag);
+ }
LLVM_ATTRIBUTE_RETURNS_NONNULL
const MemRegion *getRegion() const { return R; }
@@ -287,8 +292,8 @@ class SymbolCast : public SymExpr {
QualType ToTy;
public:
- SymbolCast(const SymExpr *In, QualType From, QualType To)
- : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
+ SymbolCast(const SymExpr *In, QualType From, QualType To, AllocIDType AllocID)
+ : SymExpr(SymbolCastKind, AllocID), Operand(In), FromTy(From), ToTy(To) {
assert(In);
assert(isValidTypeForSymbol(From));
// FIXME: GenericTaintChecker creates symbols of void type.
@@ -333,8 +338,9 @@ class UnarySymExpr : public SymExpr {
QualType T;
public:
- UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T)
- : SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) {
+ UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T,
+ AllocIDType AllocID)
+ : SymExpr(UnarySymExprKind, AllocID), Operand(In), Op(Op), T(T) {
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
// modeled as x + 1.
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -381,8 +387,9 @@ class BinarySymExpr : public SymExpr {
QualType T;
protected:
- BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
- : SymExpr(k), Op(op), T(t) {
+ BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t,
+ AllocIDType AllocID)
+ : SymExpr(k, AllocID), Op(op), T(t) {
assert(classof(this));
// Binary expressions are results of arithmetic. Pointer arithmetic is not
// handled by binary expressions, but it is instead handled by applying
@@ -427,8 +434,8 @@ class BinarySymExprImpl : public BinarySymExpr {
public:
BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
- QualType t)
- : BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
+ QualType t, AllocIDType AllocID)
+ : BinarySymExpr(ClassKind, op, t, AllocID), LHS(lhs), RHS(rhs) {
assert(getPointer(lhs));
assert(getPointer(rhs));
}
diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
index f21e5c3ad7bd7c..35682bb6645729 100644
--- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -170,7 +170,8 @@ SymbolManager::getRegionValueSymbol(const TypedValueRegion* R) {
void *InsertPos;
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
if (!SD) {
- SD = new (BPAlloc) SymbolRegionValue(SymbolCounter, R);
+ SD = new (BPAlloc)
+ SymbolRegionValue(SymbolCounter, R, BPAlloc.getBytesAllocated());
DataSet.InsertNode(SD, InsertPos);
++SymbolCounter;
}
@@ -188,7 +189,8 @@ const SymbolConjured* SymbolManager::conjureSymbol(const Stmt *E,
void *InsertPos;
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
if (!SD) {
- SD = new (BPAlloc) SymbolConjured(SymbolCounter, E, LCtx, T, Count, SymbolTag);
+ SD = new (BPAlloc) SymbolConjured(SymbolCounter, E, LCtx, T, Count,
+ SymbolTag, BPAlloc.getBytesAllocated());
DataSet.InsertNode(SD, InsertPos);
++SymbolCounter;
}
@@ -204,7 +206,8 @@ SymbolManager::getDerivedSymbol(SymbolRef parentSymbol,
void *InsertPos;
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
if (!SD) {
- SD = new (BPAlloc) SymbolDerived(SymbolCounter, parentSymbol, R);
+ SD = new (BPAlloc) SymbolDerived(SymbolCounter, parentSymbol, R,
+ BPAlloc.getBytesAllocated());
DataSet.InsertNode(SD, InsertPos);
++SymbolCounter;
}
@@ -219,7 +222,8 @@ SymbolManager::getExtentSymbol(const SubRegion *R) {
void *InsertPos;
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
if (!SD) {
- SD = new (BPAlloc) SymbolExtent(SymbolCounter, R);
+ SD = new (BPAlloc)
+ SymbolExtent(SymbolCounter, R, BPAlloc.getBytesAllocated());
DataSet.InsertNode(SD, InsertPos);
++SymbolCounter;
}
@@ -236,7 +240,8 @@ SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt *S, QualType T,
void *InsertPos;
SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
if (!SD) {
- SD = new (BPAlloc) SymbolMetadata(SymbolCounter, R, S, T, LCtx, Count, SymbolTag);
+ SD = new (BPAlloc) SymbolMetadata(SymbolCounter, R, S, T, LCtx, Count,
+ SymbolTag, BPAlloc.getBytesAllocated());
DataSet.InsertNode(SD, InsertPos);
++SymbolCounter;
}
@@ -252,7 +257,7 @@ SymbolManager::getCastSymbol(const SymExpr *Op,
void *InsertPos;
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
if (!data) {
- data = new (BPAlloc) SymbolCast(Op, From, To);
+ data = new (BPAlloc) SymbolCast(Op, From, To, BPAlloc.getBytesAllocated());
DataSet.InsertNode(data, InsertPos);
}
@@ -268,7 +273,7 @@ const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
if (!data) {
- data = new (BPAlloc) SymIntExpr(lhs, op, v, t);
+ data = new (BPAlloc) SymIntExpr(lhs, op, v, t, BPAlloc.getBytesAllocated());
DataSet.InsertNode(data, InsertPos);
}
@@ -284,7 +289,8 @@ const IntSymExpr *SymbolManager::getIntSymExpr(APSIntPtr lhs,
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
if (!data) {
- data = new (BPAlloc) IntSymExpr(lhs, op, rhs, t);
+ data =
+ new (BPAlloc) IntSymExpr(lhs, op, rhs, t, BPAlloc.getBytesAllocated());
DataSet.InsertNode(data, InsertPos);
}
@@ -301,7 +307,8 @@ const SymSymExpr *SymbolManager::getSymSymExpr(const SymExpr *lhs,
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
if (!data) {
- data = new (BPAlloc) SymSymExpr(lhs, op, rhs, t);
+ data =
+ new (BPAlloc) SymSymExpr(lhs, op, rhs, t, BPAlloc.getBytesAllocated());
DataSet.InsertNode(data, InsertPos);
}
@@ -316,7 +323,8 @@ const UnarySymExpr *SymbolManager::getUnarySymExpr(const SymExpr *Operand,
void *InsertPos;
SymExpr *data = DataSet.FindNodeOrInsertPos(ID, InsertPos);
if (!data) {
- data = new (BPAlloc) UnarySymExpr(Operand, Opc, T);
+ data = new (BPAlloc)
+ UnarySymExpr(Operand, Opc, T, BPAlloc.getBytesAllocated());
DataSet.InsertNode(data, InsertPos);
}
More information about the cfe-commits
mailing list