[clang] [clang][dataflow] Fix an issue with `Environment::getResultObjectLocation()`. (PR #75483)

via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 15 01:11:42 PST 2023


https://github.com/martinboehme updated https://github.com/llvm/llvm-project/pull/75483

>From 3262b0056708614f2a3d68750a0f118508f79ccb Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Thu, 14 Dec 2023 15:43:52 +0000
Subject: [PATCH 1/3] [clang][dataflow] Fix an issue with
 `Environment::getResultObjectLocation()`.

So far, if there was a chain of record type prvalues,
`getResultObjectLocation()` would assign a different result object location to
each one. This makes no sense, of course, as all of these prvalues end up
initializing the same result object.

This patch fixes this by propagating storage locations up through the entire
chain of prvalues.

The new implementation also has the desirable effect of making it possible to
make `getResultObjectLocation()` const, which seems appropriate given that,
logically, it is just an accessor.
---
 .../FlowSensitive/DataflowEnvironment.h       | 33 ++------
 .../FlowSensitive/DataflowEnvironment.cpp     | 77 ++++++++++++++-----
 clang/lib/Analysis/FlowSensitive/Transfer.cpp |  9 ++-
 .../Analysis/FlowSensitive/TransferTest.cpp   | 48 ++++++++++++
 4 files changed, 120 insertions(+), 47 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index d7e39ab2616fd9..5943af50b6ad8f 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -325,7 +325,8 @@ class Environment {
   ///
   /// Requirements:
   ///  `E` must be a prvalue of record type.
-  RecordStorageLocation &getResultObjectLocation(const Expr &RecordPRValue);
+  RecordStorageLocation &
+  getResultObjectLocation(const Expr &RecordPRValue) const;
 
   /// Returns the return value of the current function. This can be null if:
   /// - The function has a void return type
@@ -434,24 +435,14 @@ class Environment {
 
   /// Assigns `Val` as the value of the prvalue `E` in the environment.
   ///
-  /// If `E` is not yet associated with a storage location, associates it with
-  /// a newly created storage location. In any case, associates the storage
-  /// location of `E` with `Val`.
-  ///
-  /// Once the migration to strict handling of value categories is complete
-  /// (see https://discourse.llvm.org/t/70086), this function will be renamed to
-  /// `setValue()`. At this point, prvalue expressions will be associated
-  /// directly with `Value`s, and the legacy behavior of associating prvalue
-  /// expressions with storage locations (as described above) will be
-  /// eliminated.
-  ///
   /// Requirements:
   ///
-  ///  `E` must be a prvalue
-  ///  If `Val` is a `RecordValue`, its `RecordStorageLocation` must be the
-  ///  same as that of any `RecordValue` that has already been associated with
-  ///  `E`. This is to guarantee that the result object initialized by a prvalue
-  ///  `RecordValue` has a durable storage location.
+  ///  - `E` must be a prvalue
+  ///  - If `Val` is a `RecordValue`, its `RecordStorageLocation` must be
+  ///    `getResultObjectLocation(E)`. An exception to this is if `E` is an
+  ///    expression that originally creates a `RecordValue` (such as a
+  ///    `CXXConstructExpr` or `CallExpr`), as these establish the location of
+  ///    the result object in the first place.
   void setValue(const Expr &E, Value &Val);
 
   /// Returns the value assigned to `Loc` in the environment or null if `Loc`
@@ -608,14 +599,6 @@ class Environment {
   // The copy-constructor is for use in fork() only.
   Environment(const Environment &) = default;
 
-  /// Internal version of `setStorageLocation()` that doesn't check if the
-  /// expression is a prvalue.
-  void setStorageLocationInternal(const Expr &E, StorageLocation &Loc);
-
-  /// Internal version of `getStorageLocation()` that doesn't check if the
-  /// expression is a prvalue.
-  StorageLocation *getStorageLocationInternal(const Expr &E) const;
-
   /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise
   /// return null.
   ///
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index b98037b7364522..12192e6a32dc04 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -726,27 +726,69 @@ void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
   // so allow these as an exception.
   assert(E.isGLValue() ||
          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
-  setStorageLocationInternal(E, Loc);
+  const Expr &CanonE = ignoreCFGOmittedNodes(E);
+  assert(!ExprToLoc.contains(&CanonE));
+  ExprToLoc[&CanonE] = &Loc;
 }
 
 StorageLocation *Environment::getStorageLocation(const Expr &E) const {
   // See comment in `setStorageLocation()`.
   assert(E.isGLValue() ||
          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
-  return getStorageLocationInternal(E);
+  auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
+  return It == ExprToLoc.end() ? nullptr : &*It->second;
+}
+
+// Returns whether a prvalue of record type is the one that originally
+// constructs the object (i.e. it doesn't propagate it from one of its
+// children).
+static bool isOriginalRecordConstructor(const Expr &RecordPRValue) {
+  if (auto *Init = dyn_cast<InitListExpr>(&RecordPRValue))
+    return !Init->isSemanticForm() || !Init->isTransparent();
+  return isa<CXXConstructExpr>(RecordPRValue) || isa<CallExpr>(RecordPRValue) ||
+         isa<LambdaExpr>(RecordPRValue) ||
+         // The framework currently does not propagate the objects created in
+         // the two branches of a `ConditionalOperator` because there is no way
+         // to reconcile their storage locations, which are different. We
+         // therefore claim that the `ConditionalOperator` is the expression
+         // that originally constructs the object.
+         // Ultimately, this will be fixed by propagating locations down from
+         // the result object, rather than up from the original constructor as
+         // we do now.
+         isa<ConditionalOperator>(RecordPRValue);
 }
 
 RecordStorageLocation &
-Environment::getResultObjectLocation(const Expr &RecordPRValue) {
+Environment::getResultObjectLocation(const Expr &RecordPRValue) const {
   assert(RecordPRValue.getType()->isRecordType());
   assert(RecordPRValue.isPRValue());
 
-  if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue))
-    return *cast<RecordStorageLocation>(ExistingLoc);
-  auto &Loc = cast<RecordStorageLocation>(
-      DACtx->getStableStorageLocation(RecordPRValue));
-  setStorageLocationInternal(RecordPRValue, Loc);
-  return Loc;
+  // Returns a storage location that we can use if assertions fail.
+  auto FallbackForAssertFailure =
+      [this, &RecordPRValue]() -> RecordStorageLocation & {
+    return cast<RecordStorageLocation>(
+        DACtx->getStableStorageLocation(RecordPRValue));
+  };
+
+  if (isOriginalRecordConstructor(RecordPRValue)) {
+    auto *Val = cast_or_null<RecordValue>(getValue(RecordPRValue));
+    // The builtin transfer function should have created a `RecordValue` for all
+    // original record constructors.
+    assert(Val);
+    if (!Val)
+      return FallbackForAssertFailure();
+    return Val->getLoc();
+  }
+
+  // Expression nodes that propagate a record prvalue should have exactly one
+  // child.
+  llvm::SmallVector<const Stmt *> children(RecordPRValue.child_begin(),
+                                           RecordPRValue.child_end());
+  assert(children.size() == 1);
+  if (children.empty())
+    return FallbackForAssertFailure();
+
+  return getResultObjectLocation(*cast<Expr>(children[0]));
 }
 
 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
@@ -760,6 +802,11 @@ void Environment::setValue(const StorageLocation &Loc, Value &Val) {
 }
 
 void Environment::setValue(const Expr &E, Value &Val) {
+  if (auto *RecordVal = dyn_cast<RecordValue>(&Val)) {
+    assert(isOriginalRecordConstructor(E) ||
+           &RecordVal->getLoc() == &getResultObjectLocation(E));
+  }
+
   assert(E.isPRValue());
   ExprToVal[&E] = &Val;
 }
@@ -799,18 +846,6 @@ Value *Environment::createValue(QualType Type) {
   return Val;
 }
 
-void Environment::setStorageLocationInternal(const Expr &E,
-                                             StorageLocation &Loc) {
-  const Expr &CanonE = ignoreCFGOmittedNodes(E);
-  assert(!ExprToLoc.contains(&CanonE));
-  ExprToLoc[&CanonE] = &Loc;
-}
-
-StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const {
-  auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
-  return It == ExprToLoc.end() ? nullptr : &*It->second;
-}
-
 Value *Environment::createValueUnlessSelfReferential(
     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
     int &CreatedValuesCount) {
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index bbf5f12359bc70..346469660662e0 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -489,7 +489,6 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
     if (S->getType()->isRecordType()) {
       auto &InitialVal = *cast<RecordValue>(Env.createValue(S->getType()));
       Env.setValue(*S, InitialVal);
-      copyRecord(InitialVal.getLoc(), Env.getResultObjectLocation(*S), Env);
     }
 
     transferInlineCall(S, ConstructorDecl);
@@ -582,6 +581,14 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       Env.setValue(*S, *ArgVal);
     } else if (const FunctionDecl *F = S->getDirectCallee()) {
       transferInlineCall(S, F);
+
+      // If this call produces a prvalue of record type, make sure that we have
+      // a `RecordValue` for it. This is required so that
+      // `Environment::getResultObjectLocation()` is able to return a location
+      // for this `CallExpr`.
+      if (S->getType()->isRecordType() && S->isPRValue())
+        if (Env.getValue(*S) == nullptr)
+          refreshRecordValue(*S, Env);
     }
   }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 8da55953a32986..7a82507f4050fc 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -2635,6 +2635,54 @@ TEST(TransferTest, BindTemporary) {
       });
 }
 
+TEST(TransferTest, ResultObjectLocation) {
+  std::string Code = R"(
+    struct A {
+      virtual ~A() = default;
+    };
+
+    void target() {
+      A();
+      (void)0; // [[p]]
+    }
+  )";
+  using ast_matchers::cxxBindTemporaryExpr;
+  using ast_matchers::cxxTemporaryObjectExpr;
+  using ast_matchers::exprWithCleanups;
+  using ast_matchers::has;
+  using ast_matchers::match;
+  using ast_matchers::traverse;
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+        // The expresssion `A()` in the code above produces the following
+        // structure, consisting of three prvalues of record type.
+        // `Env.getResultObjectLocation()` should return the same location for
+        // all of these.
+        auto MatchResult = match(
+            traverse(TK_AsIs,
+                     exprWithCleanups(
+                         has(cxxBindTemporaryExpr(
+                                 has(cxxTemporaryObjectExpr().bind("toe")))
+                                 .bind("bte")))
+                         .bind("ewc")),
+            ASTCtx);
+        auto *TOE = selectFirst<CXXTemporaryObjectExpr>("toe", MatchResult);
+        ASSERT_NE(TOE, nullptr);
+        auto *EWC = selectFirst<ExprWithCleanups>("ewc", MatchResult);
+        ASSERT_NE(EWC, nullptr);
+        auto *BTE = selectFirst<CXXBindTemporaryExpr>("bte", MatchResult);
+        ASSERT_NE(BTE, nullptr);
+
+        RecordStorageLocation &Loc = Env.getResultObjectLocation(*TOE);
+        EXPECT_EQ(&Loc, &Env.getResultObjectLocation(*EWC));
+        EXPECT_EQ(&Loc, &Env.getResultObjectLocation(*BTE));
+      });
+}
+
 TEST(TransferTest, StaticCast) {
   std::string Code = R"(
     void target(int Foo) {

>From c366e66a50ce4c266f5fd315e2865a6bf968cb62 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Fri, 15 Dec 2023 09:03:15 +0000
Subject: [PATCH 2/3] In `refreshRecordValue()`, associate the new
 `RecordValue` with the existing storage location.

Without this, #75483 started making tests for an internal analysis fail because
of the following. The framework now generates `RecordValue`s for all `CallExpr`s
that are pravlues of record type; the analysis was also calling
`refreshRecordValue()` for those `CallExpr`s, but some parts of the analysis
were still using the old `RecordValue` because the entry in `LocToVal` had not
been updated.
---
 clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 12192e6a32dc04..560d683f8a7156 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -1079,6 +1079,7 @@ RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) {
     if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) {
       auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc());
       Env.setValue(Expr, NewVal);
+      Env.setValue(NewVal.getLoc(), NewVal);
       return NewVal;
     }
 

>From ae82e05a87b17bab80b5a27f8812e3282f29d36b Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Fri, 15 Dec 2023 09:11:02 +0000
Subject: [PATCH 3/3] Expand a comment

---
 clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 560d683f8a7156..93919cd0243d0c 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -754,7 +754,8 @@ static bool isOriginalRecordConstructor(const Expr &RecordPRValue) {
          // that originally constructs the object.
          // Ultimately, this will be fixed by propagating locations down from
          // the result object, rather than up from the original constructor as
-         // we do now.
+         // we do now (see also the FIXME in the documentation for
+         // `getResultObjectLocation()`).
          isa<ConditionalOperator>(RecordPRValue);
 }
 



More information about the cfe-commits mailing list