[clang] [clang][ssaf] Add UnsafeBufferUsage summary data structures (PR #181067)
Balázs Benics via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 12 03:57:13 PST 2026
================
@@ -0,0 +1,116 @@
+//===- UnsafeBufferUsage.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_SCALABLE_ANALYSES_UNSAFEBUFFERUSAGE_H
+#define LLVM_CLANG_ANALYSIS_SCALABLE_ANALYSES_UNSAFEBUFFERUSAGE_H
+
+#include "clang/Analysis/Scalable/Model/EntityId.h"
+#include "clang/Analysis/Scalable/TUSummary/EntitySummary.h"
+#include "clang/Analysis/Scalable/TUSummary/TUSummaryBuilder.h"
+#include "clang/Analysis/Scalable/TUSummary/TUSummaryExtractor.h"
+#include "llvm/ADT/SmallVector.h"
+#include <limits>
+#include <memory>
+#include <set>
+
+namespace clang::ssaf {
+
+/// A PointerKindVariable is associated with a pointer type as (a spelling part
+/// of) the declared type of an entity. In other words, a PointerKindVariable
+/// is associated with a `*` in the fully expanded spelling of the declared
+/// type.
+///
+/// For example, for `int **p;`, there are two PointerKindVariables. One is
+/// associated with `int **` and the other is associated with `int *`.
+///
+/// A PointerKindVariable can be identified by an EntityId, of which the
+/// declared type is a pointer type, and an unsigned integer indicating the
+/// pointer level with 1 referring to the whole declared pointer type.
+///
+/// For the same example `int **p;`, the two PointerKindVariables are:
+/// `(p, 1)` for `int **` and `(p, 2)` for `int *`.
+///
+/// Reserve pointer level value 0 for implementation-internal use.
+class PointerKindVariable {
+ const EntityId Entity;
+ const unsigned PointerLevel;
+
+ friend class UnsafeBufferUsageTUSummaryBuilder;
+ friend class UnsafeBufferUsageEntitySummary;
+
+ PointerKindVariable(EntityId Entity, unsigned PointerLevel)
+ : Entity(Entity), PointerLevel(PointerLevel) {}
+
+public:
+ EntityId getEntity() const { return Entity; }
+ unsigned getPointerLevel() const { return PointerLevel; }
+
+ bool operator==(const PointerKindVariable &Other) const {
+ return Entity == Other.Entity && PointerLevel == Other.PointerLevel;
+ }
+
+ bool operator!=(const PointerKindVariable &Other) const {
+ return !(*this == Other);
+ }
+
+ bool operator<(const PointerKindVariable &Other) const {
+ if (Entity == Other.Entity)
+ return PointerLevel < Other.PointerLevel;
+ return Entity < Other.Entity;
+ }
+};
+
+using PointerKindVariableSet = std::set<PointerKindVariable>;
+
+/// An UnsafeBufferUsageEntitySummary is an immutable set of unsafe buffers, in
+/// the form of PointerKindVariable.
+class UnsafeBufferUsageEntitySummary : public EntitySummary {
+ const PointerKindVariableSet UnsafeBuffers;
+
+ friend class UnsafeBufferUsageTUSummaryBuilder;
+
+ UnsafeBufferUsageEntitySummary(PointerKindVariableSet &&UnsafeBuffers)
+ : EntitySummary(SummaryName{"UnsafeBufferUsage"}),
+ UnsafeBuffers(std::move(UnsafeBuffers)) {}
+
+public:
+ using const_iterator = PointerKindVariableSet::const_iterator;
+
+ const_iterator begin() const { return UnsafeBuffers.begin(); }
+ const_iterator end() const { return UnsafeBuffers.end(); }
+
+ const_iterator find(const PointerKindVariable &V) const {
+ return UnsafeBuffers.find(V);
+ }
+
+ llvm::iterator_range<const_iterator> getSubsetOf(EntityId Entity) const {
+ auto Begin = UnsafeBuffers.lower_bound({Entity, 0});
+ auto End = UnsafeBuffers.upper_bound(
+ {Entity, std::numeric_limits<unsigned>::max()});
+ return {Begin, End};
+ }
----------------
steakhal wrote:
I think this could be done using equal_range in a single pass:
Make sure that `UnsafeBuffers` uses transparent comparators aka. the 3rd type param is `std::less<>`. That enables the 4th overload of `std::set<Key,Compare,Allocator>::equal_range` ([cppref](https://en.cppreference.com/w/cpp/container/set/equal_range.html)) and use a custom projection functor to only compare the `EnitityId` fields.
```c++
struct ByEntityId {
explicit ByEntityId(EntityId Desired) : Id(Desired) {}
/*implicit*/ ByEntityId(PointerKindVariable Outer) : Id(Outer.getEntity()) {}
friend bool operator<(const ByEntityId &LHS, const ByEntityId &RHS) {
return LHS.Id < RHS.Id;
}
private:
const EntityId Id;
};
```
And:
```c++
llvm::iterator_range<const_iterator> getSubsetOf(EntityId Entity) const {
auto [Begin, End] = UnsafeBuffers.equal_range(ByEntityId(Entity));
return llvm::make_range(Begin, End);
}
```
This should reuse the std set traversal state and make this lookup faster.
https://github.com/llvm/llvm-project/pull/181067
More information about the cfe-commits
mailing list