[clang] [clang][analyzer] Make per-entry-point metric rows uniquely identifiable (PR #161663)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 2 06:23:06 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Arseniy Zaostrovnykh (necto)
<details>
<summary>Changes</summary>
Also remove the gratuitoius spaces after "," that break strict CSV compliance.
As the debug function name does not uniquely identify an entry point, add the main-TU name and the USR values for each entry point snapshot to reduce the likelyhood of collisions between declarations across large projects.
While adding a filename to each row increases the file size substantially, the difference in size for the compressed is acceptable.
I evaluated it on our set of 200+ open source C and C++ projects with 3M entry points, and got the following results when adding these two columns:
- Raw CSV file increased from 530MB to 1.1GB
- Compressed file (XZ) increased from 54 MB to 78 MB
--
CPP-7098
Co-authored-by: Balazs Benics <balazs.benics@<!-- -->sonarsource.com>
---
Full diff: https://github.com/llvm/llvm-project/pull/161663.diff
4 Files Affected:
- (modified) clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h (+1-1)
- (modified) clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp (+24-7)
- (modified) clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp (+11-1)
- (modified) clang/test/Analysis/analyzer-stats/entry-point-stats.cpp (+6-2)
``````````diff
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h
index 633fb7aa8f72d..448e40269ca2d 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h
@@ -25,7 +25,7 @@ class EntryPointStat {
public:
llvm::StringLiteral name() const { return Name; }
- static void lockRegistry();
+ static void lockRegistry(llvm::StringRef CPPFileName);
static void takeSnapshot(const Decl *EntryPoint);
static void dumpStatsAsCSV(llvm::raw_ostream &OS);
diff --git a/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp b/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp
index b7f9044f65308..62ae62f2f2154 100644
--- a/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp
+++ b/clang/lib/StaticAnalyzer/Core/EntryPointStats.cpp
@@ -9,7 +9,9 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/EntryPointStats.h"
#include "clang/AST/DeclBase.h"
#include "clang/Analysis/AnalysisDeclContext.h"
+#include "clang/Index/USRGeneration.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FileSystem.h"
@@ -38,6 +40,7 @@ struct Registry {
};
std::vector<Snapshot> Snapshots;
+ std::string EscapedCPPFileName;
};
} // namespace
@@ -69,7 +72,7 @@ static void checkStatName(const EntryPointStat *M) {
}
}
-void EntryPointStat::lockRegistry() {
+void EntryPointStat::lockRegistry(llvm::StringRef CPPFileName) {
auto CmpByNames = [](const EntryPointStat *L, const EntryPointStat *R) {
return L->name() < R->name();
};
@@ -78,6 +81,8 @@ void EntryPointStat::lockRegistry() {
enumerateStatVectors(
[](const auto &Stats) { llvm::for_each(Stats, checkStatName); });
StatsRegistry->IsLocked = true;
+ llvm::raw_string_ostream OS(StatsRegistry->EscapedCPPFileName);
+ llvm::printEscapedString(CPPFileName, OS);
}
[[maybe_unused]] static bool isRegistered(llvm::StringLiteral Name) {
@@ -144,15 +149,27 @@ static std::vector<llvm::StringLiteral> getStatNames() {
return Ret;
}
+static std::string getUSR(const Decl *D) {
+ llvm::SmallVector<char> Buf;
+ if (index::generateUSRForDecl(D, Buf)) {
+ assert(false && "This should never fail");
+ return AnalysisDeclContext::getFunctionName(D);
+ }
+ return llvm::toStringRef(Buf).str();
+}
+
void Registry::Snapshot::dumpAsCSV(llvm::raw_ostream &OS) const {
OS << '"';
+ llvm::printEscapedString(getUSR(EntryPoint), OS);
+ OS << "\",\"";
+ OS << StatsRegistry->EscapedCPPFileName << "\",\"";
llvm::printEscapedString(
clang::AnalysisDeclContext::getFunctionName(EntryPoint), OS);
- OS << "\", ";
+ OS << "\",";
auto PrintAsBool = [&OS](bool B) { OS << (B ? "true" : "false"); };
- llvm::interleaveComma(BoolStatValues, OS, PrintAsBool);
- OS << ((BoolStatValues.empty() || UnsignedStatValues.empty()) ? "" : ", ");
- llvm::interleaveComma(UnsignedStatValues, OS);
+ llvm::interleave(BoolStatValues, OS, PrintAsBool, ",");
+ OS << ((BoolStatValues.empty() || UnsignedStatValues.empty()) ? "" : ",");
+ llvm::interleave(UnsignedStatValues, OS, [&OS](unsigned U) { OS << U; }, ",");
}
static std::vector<bool> consumeBoolStats() {
@@ -181,8 +198,8 @@ void EntryPointStat::dumpStatsAsCSV(llvm::StringRef FileName) {
}
void EntryPointStat::dumpStatsAsCSV(llvm::raw_ostream &OS) {
- OS << "EntryPoint, ";
- llvm::interleaveComma(getStatNames(), OS);
+ OS << "USR,File,DebugName,";
+ llvm::interleave(getStatNames(), OS, [&OS](const auto &a) { OS << a; }, ",");
OS << "\n";
std::vector<std::string> Rows;
diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 53466e7a75b0f..9e7538eb34600 100644
--- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -65,6 +65,15 @@ STAT_MAX(MaxCFGSize, "The maximum number of basic blocks in a function.");
namespace {
+StringRef getMainFileName(const CompilerInvocation &Invocation) {
+ if (!Invocation.getFrontendOpts().Inputs.empty()) {
+ const FrontendInputFile &Input = Invocation.getFrontendOpts().Inputs[0];
+ return Input.isFile() ? Input.getFile()
+ : Input.getBuffer().getBufferIdentifier();
+ }
+ return {};
+}
+
class AnalysisConsumer : public AnalysisASTConsumer,
public DynamicRecursiveASTVisitor {
enum {
@@ -125,7 +134,8 @@ class AnalysisConsumer : public AnalysisASTConsumer,
PP(CI.getPreprocessor()), OutDir(outdir), Opts(opts), Plugins(plugins),
Injector(std::move(injector)), CTU(CI),
MacroExpansions(CI.getLangOpts()) {
- EntryPointStat::lockRegistry();
+
+ EntryPointStat::lockRegistry(getMainFileName(CI.getInvocation()));
DigestAnalyzerOptions();
if (Opts.AnalyzerDisplayProgress || Opts.PrintStats ||
diff --git a/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp b/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp
index 1ff31d114ee99..9cbe04550a8d3 100644
--- a/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp
+++ b/clang/test/Analysis/analyzer-stats/entry-point-stats.cpp
@@ -5,7 +5,9 @@
// RUN: %csv2json "%t.csv" | FileCheck --check-prefix=CHECK %s
//
// CHECK: {
-// CHECK-NEXT: "fib(unsigned int)": {
+// CHECK-NEXT: "c:@F at fib#i#": {
+// CHECK-NEXT: "File": "{{.*}}entry-point-stats.cpp",
+// CHECK-NEXT: "DebugName": "fib(unsigned int)",
// CHECK-NEXT: "NumBlocks": "{{[0-9]+}}",
// CHECK-NEXT: "NumBlocksUnreachable": "{{[0-9]+}}",
// CHECK-NEXT: "NumCTUSteps": "{{[0-9]+}}",
@@ -40,7 +42,9 @@
// CHECK-NEXT: "MaxValidBugClassSize": "{{[0-9]+}}",
// CHECK-NEXT: "PathRunningTime": "{{[0-9]+}}"
// CHECK-NEXT: },
-// CHECK-NEXT: "main(int, char **)": {
+// CHECK-NEXT: "c:@F at main#I#**C#": {
+// CHECK-NEXT: "File": "{{.*}}entry-point-stats.cpp",
+// CHECK-NEXT: "DebugName": "main(int, char **)",
// CHECK-NEXT: "NumBlocks": "{{[0-9]+}}",
// CHECK-NEXT: "NumBlocksUnreachable": "{{[0-9]+}}",
// CHECK-NEXT: "NumCTUSteps": "{{[0-9]+}}",
``````````
</details>
https://github.com/llvm/llvm-project/pull/161663
More information about the cfe-commits
mailing list