[clang] Reland [clang][dataflow] Transfer more cast expressions. (PR #157535)
Samira Bakon via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 8 12:10:08 PDT 2025
https://github.com/bazuzi created https://github.com/llvm/llvm-project/pull/157535
Reverts llvm/llvm-project#157148
Adds fixes to `TransferVisitor::VisitCXXConstructExpr` and `copyRecord` to avoid crashing on base class initialization from sibling-derived class instances. I believe this is the only use of copyRecord where we need this special handling for a shared base class.
>From 4779770aab880315d359005a5cacd4c4976d8649 Mon Sep 17 00:00:00 2001
From: Samira Bakon <bazuzi at google.com>
Date: Mon, 8 Sep 2025 15:08:52 -0400
Subject: [PATCH] Revert "Revert "[clang][dataflow] Transfer more cast
expressions." (#157148)"
This reverts commit 4c29a600fa34d0c1cabf4ffcf081f2a00b09fddd.
---
.../Analysis/FlowSensitive/StorageLocation.h | 11 +
clang/lib/Analysis/FlowSensitive/Transfer.cpp | 71 ++++-
.../Analysis/FlowSensitive/TransferTest.cpp | 280 +++++++++++++++++-
3 files changed, 349 insertions(+), 13 deletions(-)
diff --git a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
index 8fcc6a44027a0..534b9a017d8f0 100644
--- a/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
+++ b/clang/include/clang/Analysis/FlowSensitive/StorageLocation.h
@@ -17,6 +17,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/Type.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
#include <cassert>
@@ -152,6 +153,11 @@ class RecordStorageLocation final : public StorageLocation {
return {SyntheticFields.begin(), SyntheticFields.end()};
}
+ /// Add a synthetic field, if none by that name is already present.
+ void addSyntheticField(llvm::StringRef Name, StorageLocation &Loc) {
+ SyntheticFields.insert({Name, &Loc});
+ }
+
/// Changes the child storage location for a field `D` of reference type.
/// All other fields cannot change their storage location and always retain
/// the storage location passed to the `RecordStorageLocation` constructor.
@@ -164,6 +170,11 @@ class RecordStorageLocation final : public StorageLocation {
Children[&D] = Loc;
}
+ /// Add a child storage location for a field `D`, if not already present.
+ void addChild(const ValueDecl &D, StorageLocation *Loc) {
+ Children.insert({&D, Loc});
+ }
+
llvm::iterator_range<FieldToLoc::const_iterator> children() const {
return {Children.begin(), Children.end()};
}
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 86a816e2e406c..23a6de45e18b1 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -20,14 +20,17 @@
#include "clang/AST/OperationKinds.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/Type.h"
#include "clang/Analysis/FlowSensitive/ASTOps.h"
#include "clang/Analysis/FlowSensitive/AdornedCFG.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
#include "clang/Analysis/FlowSensitive/RecordOps.h"
+#include "clang/Analysis/FlowSensitive/StorageLocation.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/LLVM.h"
#include "clang/Basic/OperatorKinds.h"
#include "llvm/Support/Casting.h"
#include <assert.h>
@@ -287,7 +290,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
}
}
- void VisitImplicitCastExpr(const ImplicitCastExpr *S) {
+ void VisitCastExpr(const CastExpr *S) {
const Expr *SubExpr = S->getSubExpr();
assert(SubExpr != nullptr);
@@ -317,6 +320,60 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
break;
}
+ case CK_BaseToDerived: {
+ // This is a cast of (single-layer) pointer or reference to a record type.
+ // We should now model the fields for the derived type.
+
+ // Get the RecordStorageLocation for the record object underneath.
+ RecordStorageLocation *Loc = nullptr;
+ if (S->getType()->isPointerType()) {
+ auto *PV = Env.get<PointerValue>(*SubExpr);
+ assert(PV != nullptr);
+ if (PV == nullptr)
+ break;
+ Loc = cast<RecordStorageLocation>(&PV->getPointeeLoc());
+ } else {
+ assert(S->getType()->isRecordType());
+ if (SubExpr->isGLValue()) {
+ Loc = Env.get<RecordStorageLocation>(*SubExpr);
+ } else {
+ Loc = &Env.getResultObjectLocation(*SubExpr);
+ }
+ }
+ if (!Loc) {
+ // Nowhere to add children or propagate from, so we're done.
+ break;
+ }
+
+ // Get the derived record type underneath the reference or pointer.
+ QualType Derived = S->getType().getNonReferenceType();
+ if (Derived->isPointerType()) {
+ Derived = Derived->getPointeeType();
+ }
+
+ // Add children to the storage location for fields (including synthetic
+ // fields) of the derived type and initialize their values.
+ for (const FieldDecl *Field :
+ Env.getDataflowAnalysisContext().getModeledFields(Derived)) {
+ assert(Field != nullptr);
+ QualType FieldType = Field->getType();
+ if (FieldType->isReferenceType()) {
+ Loc->addChild(*Field, nullptr);
+ } else {
+ Loc->addChild(*Field, &Env.createStorageLocation(FieldType));
+ }
+
+ for (const auto &Entry :
+ Env.getDataflowAnalysisContext().getSyntheticFields(Derived)) {
+ Loc->addSyntheticField(Entry.getKey(),
+ Env.createStorageLocation(Entry.getValue()));
+ }
+ }
+ Env.initializeFieldsWithValues(*Loc, Derived);
+
+ // Fall through to propagate SubExpr's StorageLocation to the CastExpr.
+ [[fallthrough]];
+ }
case CK_IntegralCast:
// FIXME: This cast creates a new integral value from the
// subexpression. But, because we don't model integers, we don't
@@ -324,10 +381,9 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
// modeling is added, then update this code to create a fresh location and
// value.
case CK_UncheckedDerivedToBase:
+ case CK_DerivedToBase:
case CK_ConstructorConversion:
case CK_UserDefinedConversion:
- // FIXME: Add tests that excercise CK_UncheckedDerivedToBase,
- // CK_ConstructorConversion, and CK_UserDefinedConversion.
case CK_NoOp: {
// FIXME: Consider making `Environment::getStorageLocation` skip noop
// expressions (this and other similar expressions in the file) instead
@@ -684,15 +740,6 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
propagateValue(*SubExpr, *S, Env);
}
- void VisitCXXStaticCastExpr(const CXXStaticCastExpr *S) {
- if (S->getCastKind() == CK_NoOp) {
- const Expr *SubExpr = S->getSubExpr();
- assert(SubExpr != nullptr);
-
- propagateValueOrStorageLocation(*SubExpr, *S, Env);
- }
- }
-
void VisitConditionalOperator(const ConditionalOperator *S) {
const Environment *TrueEnv = StmtToEnv.getEnvironment(*S->getTrueExpr());
const Environment *FalseEnv = StmtToEnv.getEnvironment(*S->getFalseExpr());
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 214aaee9f97f6..96e759e73c154 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -9,17 +9,25 @@
#include "TestingSupport.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/OperationKinds.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
+#include "clang/Analysis/FlowSensitive/NoopLattice.h"
#include "clang/Analysis/FlowSensitive/RecordOps.h"
#include "clang/Analysis/FlowSensitive/StorageLocation.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Testing/TestAST.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Testing/Support/Error.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -27,6 +35,7 @@
#include <string>
#include <string_view>
#include <utility>
+#include <vector>
namespace clang {
namespace dataflow {
@@ -3541,7 +3550,7 @@ TEST(TransferTest, ResultObjectLocationDontVisitUnevaluatedContexts) {
testFunction(Code, "noexceptTarget");
}
-TEST(TransferTest, StaticCast) {
+TEST(TransferTest, StaticCastNoOp) {
std::string Code = R"(
void target(int Foo) {
int Bar = static_cast<int>(Foo);
@@ -3561,6 +3570,13 @@ TEST(TransferTest, StaticCast) {
const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
ASSERT_THAT(BarDecl, NotNull());
+ const auto *Cast = ast_matchers::selectFirst<CXXStaticCastExpr>(
+ "cast",
+ ast_matchers::match(ast_matchers::cxxStaticCastExpr().bind("cast"),
+ ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_NoOp);
+
const auto *FooVal = Env.getValue(*FooDecl);
const auto *BarVal = Env.getValue(*BarDecl);
EXPECT_TRUE(isa<IntegerValue>(FooVal));
@@ -3569,6 +3585,268 @@ TEST(TransferTest, StaticCast) {
});
}
+TEST(TransferTest, StaticCastBaseToDerived) {
+ std::string Code = R"cc(
+ struct Base {
+ char C;
+ };
+ struct Intermediate : public Base {
+ bool B;
+ };
+ struct Derived : public Intermediate {
+ int I;
+ };
+ Base& getBaseRef();
+ void target(Base* BPtr) {
+ Derived* DPtr = static_cast<Derived*>(BPtr);
+ DPtr->C;
+ DPtr->B;
+ DPtr->I;
+ Derived& DRef = static_cast<Derived&>(*BPtr);
+ DRef.C;
+ DRef.B;
+ DRef.I;
+ Derived& DRefFromFunc = static_cast<Derived&>(getBaseRef());
+ DRefFromFunc.C;
+ DRefFromFunc.B;
+ DRefFromFunc.I;
+ // [[p]]
+ }
+ )cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ const ValueDecl *BPtrDecl = findValueDecl(ASTCtx, "BPtr");
+ ASSERT_THAT(BPtrDecl, NotNull());
+
+ const ValueDecl *DPtrDecl = findValueDecl(ASTCtx, "DPtr");
+ ASSERT_THAT(DPtrDecl, NotNull());
+
+ const ValueDecl *DRefDecl = findValueDecl(ASTCtx, "DRef");
+ ASSERT_THAT(DRefDecl, NotNull());
+
+ const ValueDecl *DRefFromFuncDecl =
+ findValueDecl(ASTCtx, "DRefFromFunc");
+ ASSERT_THAT(DRefFromFuncDecl, NotNull());
+
+ const auto *Cast = ast_matchers::selectFirst<CXXStaticCastExpr>(
+ "cast",
+ ast_matchers::match(ast_matchers::cxxStaticCastExpr().bind("cast"),
+ ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_BaseToDerived);
+
+ EXPECT_EQ(Env.getValue(*BPtrDecl), Env.getValue(*DPtrDecl));
+ EXPECT_EQ(&Env.get<PointerValue>(*BPtrDecl)->getPointeeLoc(),
+ Env.getStorageLocation(*DRefDecl));
+ // For DRefFromFunc, not crashing when analyzing the field accesses is
+ // enough.
+ });
+}
+
+TEST(TransferTest, ExplicitDerivedToBaseCast) {
+ std::string Code = R"cc(
+ struct Base {};
+ struct Derived : public Base {};
+ void target(Derived D) {
+ (Base*)&D;
+ // [[p]]
+ }
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>(
+ "cast", ast_matchers::match(
+ ast_matchers::implicitCastExpr().bind("cast"), ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_DerivedToBase);
+
+ auto *AddressOf = ast_matchers::selectFirst<UnaryOperator>(
+ "addressof",
+ ast_matchers::match(ast_matchers::unaryOperator().bind("addressof"),
+ ASTCtx));
+ ASSERT_THAT(AddressOf, NotNull());
+ ASSERT_EQ(AddressOf->getOpcode(), UO_AddrOf);
+
+ EXPECT_EQ(Env.getValue(*Cast), Env.getValue(*AddressOf));
+ });
+}
+
+TEST(TransferTest, ConstructorConversion) {
+ std::string Code = R"cc(
+ struct Base {};
+ struct Derived : public Base {};
+ void target(Derived D) {
+ Base B = (Base)D;
+ // [[p]]
+ }
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ auto *Cast = ast_matchers::selectFirst<CStyleCastExpr>(
+ "cast", ast_matchers::match(
+ ast_matchers::cStyleCastExpr().bind("cast"), ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_ConstructorConversion);
+
+ auto &DLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "D");
+ auto &BLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "B");
+ EXPECT_NE(&BLoc, &DLoc);
+ });
+}
+
+TEST(TransferTest, UserDefinedConversion) {
+ std::string Code = R"cc(
+ struct To {};
+ struct From {
+ operator To();
+ };
+ void target(From F) {
+ To T = (To)F;
+ // [[p]]
+ }
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>(
+ "cast", ast_matchers::match(
+ ast_matchers::implicitCastExpr().bind("cast"), ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_UserDefinedConversion);
+
+ auto &FLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "F");
+ auto &TLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "T");
+ EXPECT_NE(&TLoc, &FLoc);
+ });
+}
+
+TEST(TransferTest, ImplicitUncheckedDerivedToBaseCast) {
+ std::string Code = R"cc(
+ struct Base {
+ void method();
+ };
+ struct Derived : public Base {};
+ void target(Derived D) {
+ D.method();
+ // [[p]]
+ }
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>(
+ "cast", ast_matchers::match(
+ ast_matchers::implicitCastExpr().bind("cast"), ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_UncheckedDerivedToBase);
+
+ auto &DLoc = getLocForDecl<StorageLocation>(ASTCtx, Env, "D");
+ EXPECT_EQ(Env.getStorageLocation(*Cast), &DLoc);
+ });
+}
+
+TEST(TransferTest, ImplicitDerivedToBaseCast) {
+ std::string Code = R"cc(
+ struct Base {};
+ struct Derived : public Base {};
+ void target() {
+ Base* B = new Derived();
+ // [[p]]
+ }
+)cc";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ auto *Cast = ast_matchers::selectFirst<ImplicitCastExpr>(
+ "cast", ast_matchers::match(
+ ast_matchers::implicitCastExpr().bind("cast"), ASTCtx));
+ ASSERT_THAT(Cast, NotNull());
+ ASSERT_EQ(Cast->getCastKind(), CK_DerivedToBase);
+
+ auto *New = ast_matchers::selectFirst<CXXNewExpr>(
+ "new", ast_matchers::match(ast_matchers::cxxNewExpr().bind("new"),
+ ASTCtx));
+ ASSERT_THAT(New, NotNull());
+
+ EXPECT_EQ(Env.getValue(*Cast), Env.getValue(*New));
+ });
+}
+
+TEST(TransferTest, ReinterpretCast) {
+ std::string Code = R"cc(
+ struct S {
+ int I;
+ };
+
+ void target(unsigned char* Bytes) {
+ S& SRef = reinterpret_cast<S&>(Bytes);
+ SRef.I;
+ S* SPtr = reinterpret_cast<S*>(Bytes);
+ SPtr->I;
+ // [[p]]
+ }
+ )cc";
+ runDataflow(Code, [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>>
+ &Results,
+ ASTContext &ASTCtx) {
+ ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+ const ValueDecl *I = findValueDecl(ASTCtx, "I");
+ ASSERT_THAT(I, NotNull());
+
+ // No particular knowledge of I's value is modeled, but for both casts,
+ // the fields of S are modeled.
+
+ {
+ auto &Loc = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "SRef");
+ std::vector<const ValueDecl *> Children;
+ for (const auto &Entry : Loc.children()) {
+ Children.push_back(Entry.getFirst());
+ }
+
+ EXPECT_THAT(Children, UnorderedElementsAre(I));
+ }
+
+ {
+ auto &Loc = cast<RecordStorageLocation>(
+ getValueForDecl<PointerValue>(ASTCtx, Env, "SPtr").getPointeeLoc());
+ std::vector<const ValueDecl *> Children;
+ for (const auto &Entry : Loc.children()) {
+ Children.push_back(Entry.getFirst());
+ }
+
+ EXPECT_THAT(Children, UnorderedElementsAre(I));
+ }
+ });
+}
+
TEST(TransferTest, IntegralCast) {
std::string Code = R"(
void target(int Foo) {
More information about the cfe-commits
mailing list