[clang] [clang][dataflow] Correctly treat empty initializer lists for unions. (PR #82986)

via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 29 05:23:03 PST 2024


https://github.com/martinboehme updated https://github.com/llvm/llvm-project/pull/82986

>From cd64b0e04026235283016eaf1cd601076ab7aeb2 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Tue, 27 Feb 2024 18:23:36 +0000
Subject: [PATCH 1/3] Reapply "[clang][dataflow] Correctly handle
 `InitListExpr` of union type." (#82856)

This reverts commit c4e94633e8a48ee33115d5d3161ee142fc1c9700.
---
 .../FlowSensitive/DataflowEnvironment.h       |  9 ++++---
 .../FlowSensitive/DataflowEnvironment.cpp     | 18 ++++++++++---
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 25 +++++++++++--------
 .../Analysis/FlowSensitive/TestingSupport.h   | 19 ++++++++++++++
 .../Analysis/FlowSensitive/TransferTest.cpp   | 14 +++++++++--
 5 files changed, 65 insertions(+), 20 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 7f8c70d169376e..62e7af7ac219bc 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -723,9 +723,12 @@ RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
                                              const Environment &Env);
 
-/// Returns the fields of `RD` that are initialized by an `InitListExpr`, in the
-/// order in which they appear in `InitListExpr::inits()`.
-std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD);
+/// Returns the fields of a `RecordDecl` that are initialized by an
+/// `InitListExpr`, in the order in which they appear in
+/// `InitListExpr::inits()`.
+/// `Init->getType()` must be a record type.
+std::vector<const FieldDecl *>
+getFieldsForInitListExpr(const InitListExpr *InitList);
 
 /// Associates a new `RecordValue` with `Loc` and returns the new value.
 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env);
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index d487944ce92111..0cfc26ea952cda 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -361,8 +361,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
     if (const auto *FD = dyn_cast<FieldDecl>(VD))
       Fields.insert(FD);
   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
-    if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
-      for (const auto *FD : getFieldsForInitListExpr(RD))
+    if (InitList->getType()->isRecordType())
+      for (const auto *FD : getFieldsForInitListExpr(InitList))
         Fields.insert(FD);
   }
 }
@@ -1104,12 +1104,22 @@ RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
   return Env.get<RecordStorageLocation>(*Base);
 }
 
-std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
+std::vector<const FieldDecl *>
+getFieldsForInitListExpr(const InitListExpr *InitList) {
+  const RecordDecl *RD = InitList->getType()->getAsRecordDecl();
+  assert(RD != nullptr);
+
+  std::vector<const FieldDecl *> Fields;
+
+  if (InitList->getType()->isUnionType()) {
+    Fields.push_back(InitList->getInitializedFieldInUnion());
+    return Fields;
+  }
+
   // Unnamed bitfields are only used for padding and do not appear in
   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
   // field list, and we thus need to remove them before mapping inits to
   // fields to avoid mapping inits to the wrongs fields.
-  std::vector<FieldDecl *> Fields;
   llvm::copy_if(
       RD->fields(), std::back_inserter(Fields),
       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 089854264f483a..3f7e2e27d25cfd 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -663,14 +663,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
   void VisitInitListExpr(const InitListExpr *S) {
     QualType Type = S->getType();
 
-    if (Type->isUnionType()) {
-      // FIXME: Initialize unions properly.
-      if (auto *Val = Env.createValue(Type))
-        Env.setValue(*S, *Val);
-      return;
-    }
-
-    if (!Type->isStructureOrClassType()) {
+    if (!Type->isRecordType()) {
       // Until array initialization is implemented, we skip arrays and don't
       // need to care about cases where `getNumInits() > 1`.
       if (!Type->isArrayType() && S->getNumInits() == 1)
@@ -688,10 +681,9 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
 
     // This only contains the direct fields for the given type.
-    std::vector<FieldDecl *> FieldsForInit =
-        getFieldsForInitListExpr(Type->getAsRecordDecl());
+    std::vector<const FieldDecl *> FieldsForInit = getFieldsForInitListExpr(S);
 
-    // `S->inits()` contains all the initializer epressions, including the
+    // `S->inits()` contains all the initializer expressions, including the
     // ones for direct base classes.
     auto Inits = S->inits();
     size_t InitIdx = 0;
@@ -731,6 +723,17 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       FieldLocs.insert({Field, &Loc});
     }
 
+    // In the case of a union, we don't in general have initializers for all
+    // of the fields. Create storage locations for the remaining fields (but
+    // don't associate them with values).
+    if (Type->isUnionType()) {
+      for (const FieldDecl *Field :
+           Env.getDataflowAnalysisContext().getModeledFields(Type)) {
+        if (auto [it, inserted] = FieldLocs.insert({Field, nullptr}); inserted)
+          it->second = &Env.createStorageLocation(Field->getType());
+      }
+    }
+
     // Check that we satisfy the invariant that a `RecordStorageLoation`
     // contains exactly the set of modeled fields for that type.
     // `ModeledFields` includes fields from all the bases, but only the
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
index 0d36d2802897fd..b7cf6cc966edb0 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
@@ -432,6 +432,8 @@ llvm::Error checkDataflowWithNoopAnalysis(
         {});
 
 /// Returns the `ValueDecl` for the given identifier.
+/// The returned pointer is guaranteed to be non-null; the function asserts if
+/// no `ValueDecl` with the given name is found.
 ///
 /// Requirements:
 ///
@@ -475,6 +477,15 @@ ValueT &getValueForDecl(ASTContext &ASTCtx, const Environment &Env,
   return *cast<ValueT>(Env.getValue(*VD));
 }
 
+/// Returns the storage location for the field called `Name` of `Loc`.
+/// Optionally casts the field storage location to `T`.
+template <typename T = StorageLocation>
+std::enable_if_t<std::is_base_of_v<StorageLocation, T>, T &>
+getFieldLoc(const RecordStorageLocation &Loc, llvm::StringRef Name,
+            ASTContext &ASTCtx) {
+  return *cast<T>(Loc.getChild(*findValueDecl(ASTCtx, Name)));
+}
+
 /// Returns the value of a `Field` on the record referenced by `Loc.`
 /// Returns null if `Loc` is null.
 inline Value *getFieldValue(const RecordStorageLocation *Loc,
@@ -487,6 +498,14 @@ inline Value *getFieldValue(const RecordStorageLocation *Loc,
   return Env.getValue(*FieldLoc);
 }
 
+/// Returns the value of a `Field` on the record referenced by `Loc.`
+/// Returns null if `Loc` is null.
+inline Value *getFieldValue(const RecordStorageLocation *Loc,
+                            llvm::StringRef Name, ASTContext &ASTCtx,
+                            const Environment &Env) {
+  return getFieldValue(Loc, *findValueDecl(ASTCtx, Name), Env);
+}
+
 /// Creates and owns constraints which are boolean values.
 class ConstraintContext {
   unsigned NextAtom = 0;
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 2be899f5b6da91..f09a54544cdcb2 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -2392,14 +2392,24 @@ TEST(TransferTest, InitListExprAsUnion) {
       } F;
 
      public:
-      constexpr target() : F{nullptr} {}
+      constexpr target() : F{nullptr} {
+        int *null = nullptr;
+        F.b;  // Make sure we reference 'b' so it is modeled.
+        // [[p]]
+      }
     };
   )cc";
   runDataflow(
       Code,
       [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
          ASTContext &ASTCtx) {
-        // Just verify that it doesn't crash.
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+        auto &FLoc = getFieldLoc<RecordStorageLocation>(
+            *Env.getThisPointeeStorageLocation(), "F", ASTCtx);
+        auto *AVal = cast<PointerValue>(getFieldValue(&FLoc, "a", ASTCtx, Env));
+        ASSERT_EQ(AVal, &getValueForDecl<PointerValue>(ASTCtx, Env, "null"));
+        ASSERT_EQ(getFieldValue(&FLoc, "b", ASTCtx, Env), nullptr);
       });
 }
 

>From 607c26eb4bd43d6a7eecd54a50c692ea6491e910 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Mon, 26 Feb 2024 11:41:08 +0000
Subject: [PATCH 2/3] [clang][dataflow] Correctly treat empty initializer lists
 for unions.

This fixes a crash introduced by https://github.com/llvm/llvm-project/pull/82348
but also adds additional handling to make sure that we treat empty initializer
lists for both unions and structs/classes correctly (see tests added in this
patch).
---
 .../FlowSensitive/DataflowEnvironment.cpp     |  7 +-
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 15 +++-
 .../Analysis/FlowSensitive/TransferTest.cpp   | 68 ++++++++++++++++++-
 3 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 0cfc26ea952cda..fd7b06efcc7861 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -983,7 +983,7 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
   }
 
   Value *Val = nullptr;
-  if (InitExpr)
+  if (InitExpr) {
     // In the (few) cases where an expression is intentionally
     // "uninterpreted", `InitExpr` is not associated with a value.  There are
     // two ways to handle this situation: propagate the status, so that
@@ -998,6 +998,11 @@ StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
     // default value (assuming we don't update the environment API to return
     // references).
     Val = getValue(*InitExpr);
+
+    if (!Val && isa<ImplicitValueInitExpr>(InitExpr) &&
+        InitExpr->getType()->isPointerType())
+      Val = &getOrCreateNullPointerValue(InitExpr->getType()->getPointeeType());
+  }
   if (!Val)
     Val = createValue(Ty);
 
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 3f7e2e27d25cfd..04aa2831df0558 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -685,9 +685,22 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
 
     // `S->inits()` contains all the initializer expressions, including the
     // ones for direct base classes.
-    auto Inits = S->inits();
+    ArrayRef<Expr *> Inits = S->inits();
     size_t InitIdx = 0;
 
+    // Unions initialized with an empty initializer list need special treatment.
+    // For structs/classes initialized with an empty initializer list, Clang
+    // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions,
+    // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves.
+    std::optional<ImplicitValueInitExpr> ImplicitValueInitForUnion;
+    SmallVector<Expr *> InitsForUnion;
+    if (S->getType()->isUnionType() && Inits.empty()) {
+      assert(FieldsForInit.size() == 1);
+      ImplicitValueInitForUnion.emplace(FieldsForInit.front()->getType());
+      InitsForUnion.push_back(&*ImplicitValueInitForUnion);
+      Inits = InitsForUnion;
+    }
+
     // Initialize base classes.
     if (auto* R = S->getType()->getAsCXXRecordDecl()) {
       assert(FieldsForInit.size() + R->getNumBases() == Inits.size());
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index f09a54544cdcb2..f6e6343e7bf655 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -2408,8 +2408,72 @@ TEST(TransferTest, InitListExprAsUnion) {
         auto &FLoc = getFieldLoc<RecordStorageLocation>(
             *Env.getThisPointeeStorageLocation(), "F", ASTCtx);
         auto *AVal = cast<PointerValue>(getFieldValue(&FLoc, "a", ASTCtx, Env));
-        ASSERT_EQ(AVal, &getValueForDecl<PointerValue>(ASTCtx, Env, "null"));
-        ASSERT_EQ(getFieldValue(&FLoc, "b", ASTCtx, Env), nullptr);
+        EXPECT_EQ(AVal, &getValueForDecl<PointerValue>(ASTCtx, Env, "null"));
+        EXPECT_EQ(getFieldValue(&FLoc, "b", ASTCtx, Env), nullptr);
+      });
+}
+
+TEST(TransferTest, EmptyInitListExprForUnion) {
+  // This is a crash repro.
+  std::string Code = R"cc(
+    class target {
+      union {
+        int *a;
+        bool *b;
+      } F;
+
+     public:
+      constexpr target() : F{} {
+        int *null = nullptr;
+        F.b;  // Make sure we reference 'b' so it is modeled.
+        // [[p]]
+      }
+    };
+  )cc";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+        auto &FLoc = getFieldLoc<RecordStorageLocation>(
+            *Env.getThisPointeeStorageLocation(), "F", ASTCtx);
+        auto *AVal = cast<PointerValue>(getFieldValue(&FLoc, "a", ASTCtx, Env));
+        EXPECT_EQ(AVal, &getValueForDecl<PointerValue>(ASTCtx, Env, "null"));
+        EXPECT_EQ(getFieldValue(&FLoc, "b", ASTCtx, Env), nullptr);
+      });
+}
+
+TEST(TransferTest, EmptyInitListExprForStruct) {
+  std::string Code = R"cc(
+    class target {
+      struct {
+        int *a;
+        bool *b;
+      } F;
+
+     public:
+      constexpr target() : F{} {
+        int *NullIntPtr = nullptr;
+        bool *NullBoolPtr = nullptr;
+        // [[p]]
+      }
+    };
+  )cc";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+        auto &FLoc = getFieldLoc<RecordStorageLocation>(
+            *Env.getThisPointeeStorageLocation(), "F", ASTCtx);
+        auto *AVal = cast<PointerValue>(getFieldValue(&FLoc, "a", ASTCtx, Env));
+        ASSERT_EQ(AVal,
+                  &getValueForDecl<PointerValue>(ASTCtx, Env, "NullIntPtr"));
+        auto *BVal = cast<PointerValue>(getFieldValue(&FLoc, "b", ASTCtx, Env));
+        ASSERT_EQ(BVal,
+                  &getValueForDecl<PointerValue>(ASTCtx, Env, "NullBoolPtr"));
       });
 }
 

>From 5c3c1da2c076f17c864089c937897cd9924f51f0 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Thu, 29 Feb 2024 13:22:39 +0000
Subject: [PATCH 3/3] fixup! [clang][dataflow] Correctly treat empty
 initializer lists for unions.

---
 clang/unittests/Analysis/FlowSensitive/TransferTest.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index f6e6343e7bf655..f534ccb1254701 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -2423,6 +2423,10 @@ TEST(TransferTest, EmptyInitListExprForUnion) {
       } F;
 
      public:
+      // Empty initializer list means that `F` is aggregate-initialized.
+      // For a union, this has the effect that the first member of the union
+      // is copy-initialized from an empty initializer list; in this specific
+      // case, this has the effect of initializing `a` with null.
       constexpr target() : F{} {
         int *null = nullptr;
         F.b;  // Make sure we reference 'b' so it is modeled.
@@ -2469,10 +2473,10 @@ TEST(TransferTest, EmptyInitListExprForStruct) {
         auto &FLoc = getFieldLoc<RecordStorageLocation>(
             *Env.getThisPointeeStorageLocation(), "F", ASTCtx);
         auto *AVal = cast<PointerValue>(getFieldValue(&FLoc, "a", ASTCtx, Env));
-        ASSERT_EQ(AVal,
+        EXPECT_EQ(AVal,
                   &getValueForDecl<PointerValue>(ASTCtx, Env, "NullIntPtr"));
         auto *BVal = cast<PointerValue>(getFieldValue(&FLoc, "b", ASTCtx, Env));
-        ASSERT_EQ(BVal,
+        EXPECT_EQ(BVal,
                   &getValueForDecl<PointerValue>(ASTCtx, Env, "NullBoolPtr"));
       });
 }



More information about the cfe-commits mailing list