[clang] [clang][dataflow] Model conditional operator correctly. (PR #89213)

via cfe-commits cfe-commits at lists.llvm.org
Sun Apr 21 23:56:55 PDT 2024


https://github.com/martinboehme updated https://github.com/llvm/llvm-project/pull/89213

>From d4205b37d9ba3cecd7cd947a188ec84e9afec899 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Thu, 18 Apr 2024 10:50:40 +0000
Subject: [PATCH 1/4] [clang][dataflow] Model conditional operator correctly.

---
 .../FlowSensitive/DataflowEnvironment.h       | 15 +++++
 .../clang/Analysis/FlowSensitive/Transfer.h   |  3 +-
 .../FlowSensitive/DataflowEnvironment.cpp     | 46 ++++++-------
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 38 ++++++-----
 .../TypeErasedDataflowAnalysis.cpp            |  4 +-
 .../Analysis/FlowSensitive/TestingSupport.h   |  4 +-
 .../Analysis/FlowSensitive/TransferTest.cpp   | 66 +++++++++++++++++--
 7 files changed, 130 insertions(+), 46 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 4277792219c0af..7ad866ba3f62ff 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -244,6 +244,21 @@ class Environment {
                           Environment::ValueModel &Model,
                           ExprJoinBehavior ExprBehavior);
 
+  /// Returns a value that approximates both `Val1` and `Val2`, or null if no
+  /// such value can be produced.
+  ///
+  /// `Env1` and `Env2` can be used to query child values and path condition
+  /// implications of `Val1` and `Val2` respectively. The joined value will be
+  /// produced in `JoinedEnv`.
+  ///
+  /// Requirements:
+  ///
+  ///  `Val1` and `Val2` must model values of type `Type`.
+  static Value *joinValues(QualType Ty, Value *Val1, const Environment &Env1,
+                           Value *Val2, const Environment &Env2,
+                           Environment &JoinedEnv,
+                           Environment::ValueModel &Model);
+
   /// Widens the environment point-wise, using `PrevEnv` as needed to inform the
   /// approximation.
   ///
diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
index ed148250d8eb29..940025e02100f9 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h
@@ -53,7 +53,8 @@ class StmtToEnvMap {
 /// Requirements:
 ///
 ///  `S` must not be `ParenExpr` or `ExprWithCleanups`.
-void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env);
+void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env,
+              Environment::ValueModel &Model);
 
 } // namespace dataflow
 } // namespace clang
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 3f1600d9ac5d87..18fd6476dedf4b 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -255,13 +255,8 @@ joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
       continue;
     assert(It->second != nullptr);
 
-    if (areEquivalentValues(*Val, *It->second)) {
-      Result.insert({Loc, Val});
-      continue;
-    }
-
-    if (Value *JoinedVal = joinDistinctValues(
-            Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
+    if (Value *JoinedVal = Environment::joinValues(
+            Loc->getType(), Val, Env1, It->second, Env2, JoinedEnv, Model)) {
       Result.insert({Loc, JoinedVal});
     }
   }
@@ -788,27 +783,16 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
   JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal;
   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
 
-  if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
-    // `ReturnVal` might not always get set -- for example if we have a return
-    // statement of the form `return some_other_func()` and we decide not to
-    // analyze `some_other_func()`.
-    // In this case, we can't say anything about the joined return value -- we
-    // don't simply want to propagate the return value that we do have, because
-    // it might not be the correct one.
-    // This occurs for example in the test `ContextSensitiveMutualRecursion`.
+  if (EnvA.CallStack.empty()) {
     JoinedEnv.ReturnVal = nullptr;
-  } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
-    JoinedEnv.ReturnVal = EnvA.ReturnVal;
   } else {
-    assert(!EnvA.CallStack.empty());
     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
     // cast.
     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
     assert(Func != nullptr);
-    if (Value *JoinedVal =
-            joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
-                               *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
-      JoinedEnv.ReturnVal = JoinedVal;
+    JoinedEnv.ReturnVal =
+        joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal,
+                   EnvB, JoinedEnv, Model);
   }
 
   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
@@ -834,6 +818,24 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
   return JoinedEnv;
 }
 
+Value *Environment::joinValues(QualType Ty, Value *Val1,
+                               const Environment &Env1, Value *Val2,
+                               const Environment &Env2, Environment &JoinedEnv,
+                               Environment::ValueModel &Model) {
+  if (Val1 == nullptr || Val2 == nullptr)
+    // We can't say anything about the joined value -- even if one of the values
+    // is non-null, we don't want to simply propagate it, because it would be
+    // too specific: Because the other value is null, that means we have no
+    // information at all about the value (i.e. the value is unconstrained).
+    return nullptr;
+
+  if (areEquivalentValues(*Val1, *Val2))
+    // Arbitrarily return one of the two values.
+    return Val1;
+
+  return joinDistinctValues(Ty, *Val1, Env1, *Val2, Env2, JoinedEnv, Model);
+}
+
 StorageLocation &Environment::createStorageLocation(QualType Type) {
   return DACtx->createStorageLocation(Type);
 }
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 1e034771014eaa..37862e8695cccc 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -122,8 +122,9 @@ namespace {
 
 class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
 public:
-  TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env)
-      : StmtToEnv(StmtToEnv), Env(Env) {}
+  TransferVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
+                  Environment::ValueModel &Model)
+      : StmtToEnv(StmtToEnv), Env(Env), Model(Model) {}
 
   void VisitBinaryOperator(const BinaryOperator *S) {
     const Expr *LHS = S->getLHS();
@@ -657,17 +658,22 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
   }
 
   void VisitConditionalOperator(const ConditionalOperator *S) {
-    // FIXME: Revisit this once flow conditions are added to the framework. For
-    // `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow
-    // condition.
-    // When we do this, we will need to retrieve the values of the operands from
-    // the environments for the basic blocks they are computed in, in a similar
-    // way to how this is done for short-circuited logical operators in
-    // `getLogicOperatorSubExprValue()`.
-    if (S->isGLValue())
-      Env.setStorageLocation(*S, Env.createObject(S->getType()));
-    else if (!S->getType()->isRecordType()) {
-      if (Value *Val = Env.createValue(S->getType()))
+    const Environment *TrueEnv = StmtToEnv.getEnvironment(*S->getTrueExpr());
+    const Environment *FalseEnv = StmtToEnv.getEnvironment(*S->getFalseExpr());
+
+    if (TrueEnv == nullptr || FalseEnv == nullptr)
+      return;
+
+    if (S->isGLValue()) {
+      StorageLocation *TrueLoc = TrueEnv->getStorageLocation(*S->getTrueExpr());
+      StorageLocation *FalseLoc =
+          FalseEnv->getStorageLocation(*S->getFalseExpr());
+      if (TrueLoc == FalseLoc && TrueLoc != nullptr)
+        Env.setStorageLocation(*S, *TrueLoc);
+    } else if (!S->getType()->isRecordType()) {
+      if (Value *Val = Environment::joinValues(
+              S->getType(), TrueEnv->getValue(*S->getTrueExpr()), *TrueEnv,
+              FalseEnv->getValue(*S->getFalseExpr()), *FalseEnv, Env, Model))
         Env.setValue(*S, *Val);
     }
   }
@@ -830,12 +836,14 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
 
   const StmtToEnvMap &StmtToEnv;
   Environment &Env;
+  Environment::ValueModel &Model;
 };
 
 } // namespace
 
-void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env) {
-  TransferVisitor(StmtToEnv, Env).Visit(&S);
+void transfer(const StmtToEnvMap &StmtToEnv, const Stmt &S, Environment &Env,
+              Environment::ValueModel &Model) {
+  TransferVisitor(StmtToEnv, Env, Model).Visit(&S);
 }
 
 } // namespace dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 1b73c5d6830161..5a21a85b90ee9b 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -316,7 +316,7 @@ builtinTransferStatement(unsigned CurBlockID, const CFGStmt &Elt,
   const Stmt *S = Elt.getStmt();
   assert(S != nullptr);
   transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, CurBlockID, InputState), *S,
-           InputState.Env);
+           InputState.Env, AC.Analysis);
 }
 
 /// Built-in transfer function for `CFGInitializer`.
@@ -452,7 +452,7 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
       // terminator condition, but not as a `CFGElement`. The condition of an if
       // statement is one such example.
       transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, Block.getBlockID(), State),
-               *TerminatorCond, State.Env);
+               *TerminatorCond, State.Env, AC.Analysis);
 
     // If the transfer function didn't produce a value, create an atom so that
     // we have *some* value for the condition expression. This ensures that
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
index e3c7ff685f5724..3b0e05ed72220e 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
@@ -456,7 +456,7 @@ const IndirectFieldDecl *findIndirectFieldDecl(ASTContext &ASTCtx,
 /// Requirements:
 ///
 ///   `Name` must be unique in `ASTCtx`.
-template <class LocT>
+template <class LocT = StorageLocation>
 LocT &getLocForDecl(ASTContext &ASTCtx, const Environment &Env,
                     llvm::StringRef Name) {
   const ValueDecl *VD = findValueDecl(ASTCtx, Name);
@@ -470,7 +470,7 @@ LocT &getLocForDecl(ASTContext &ASTCtx, const Environment &Env,
 /// Requirements:
 ///
 ///   `Name` must be unique in `ASTCtx`.
-template <class ValueT>
+template <class ValueT = Value>
 ValueT &getValueForDecl(ASTContext &ASTCtx, const Environment &Env,
                         llvm::StringRef Name) {
   const ValueDecl *VD = findValueDecl(ASTCtx, Name);
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 97ec32126c1dc4..cff8d1e6324640 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -5243,6 +5243,67 @@ TEST(TransferTest, BinaryOperatorComma) {
       });
 }
 
+TEST(TransferTest, ConditionalOperatorValue) {
+  std::string Code = R"(
+    void target(bool Cond, bool B1, bool B2) {
+      bool JoinSame = Cond ? B1 : B1;
+      bool JoinDifferent = Cond ? B1 : B2;
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        Environment Env = getEnvironmentAtAnnotation(Results, "p").fork();
+
+        auto &B1 = getValueForDecl<BoolValue>(ASTCtx, Env, "B1");
+        auto &B2 = getValueForDecl<BoolValue>(ASTCtx, Env, "B2");
+        auto &JoinSame = getValueForDecl<BoolValue>(ASTCtx, Env, "JoinSame");
+        auto &JoinDifferent =
+            getValueForDecl<BoolValue>(ASTCtx, Env, "JoinDifferent");
+
+        EXPECT_EQ(&JoinSame, &B1);
+
+        const Formula &JoinDifferentEqB1 =
+            Env.arena().makeEquals(JoinDifferent.formula(), B1.formula());
+        EXPECT_TRUE(Env.allows(JoinDifferentEqB1));
+        EXPECT_FALSE(Env.proves(JoinDifferentEqB1));
+
+        const Formula &JoinDifferentEqB2 =
+            Env.arena().makeEquals(JoinDifferent.formula(), B2.formula());
+        EXPECT_TRUE(Env.allows(JoinDifferentEqB2));
+        EXPECT_FALSE(Env.proves(JoinDifferentEqB1));
+      });
+}
+
+TEST(TransferTest, ConditionalOperatorLocation) {
+  std::string Code = R"(
+    void target(bool Cond, int I1, int I2) {
+      int &JoinSame = Cond ? I1 : I1;
+      int &JoinDifferent = Cond ? I1 : I2;
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        Environment Env = getEnvironmentAtAnnotation(Results, "p").fork();
+
+        StorageLocation &I1 = getLocForDecl(ASTCtx, Env, "I1");
+        StorageLocation &I2 = getLocForDecl(ASTCtx, Env, "I2");
+        StorageLocation &JoinSame = getLocForDecl(ASTCtx, Env, "JoinSame");
+        StorageLocation &JoinDifferent =
+            getLocForDecl(ASTCtx, Env, "JoinDifferent");
+
+        EXPECT_EQ(&JoinSame, &I1);
+
+        EXPECT_NE(&JoinDifferent, &I1);
+        EXPECT_NE(&JoinDifferent, &I2);
+      });
+}
+
 TEST(TransferTest, IfStmtBranchExtendsFlowCondition) {
   std::string Code = R"(
     void target(bool Foo) {
@@ -5492,10 +5553,7 @@ TEST(TransferTest, ContextSensitiveReturnReferenceWithConditionalOperator) {
         ASSERT_THAT(Loc, NotNull());
         EXPECT_THAT(Env.getValue(*Loc), NotNull());
 
-        // TODO: We would really like to make this stronger assertion, but that
-        // doesn't work because we don't propagate values correctly through
-        // the conditional operator yet.
-        // ASSERT_THAT(Loc, Eq(SLoc));
+        EXPECT_THAT(Loc, Eq(SLoc));
       },
       {BuiltinOptions{ContextSensitiveOptions{}}});
 }

>From 239cfa18a2509aaf66922f02a4c26c9c6d8718f5 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Thu, 18 Apr 2024 14:03:30 +0000
Subject: [PATCH 2/4] fixup! [clang][dataflow] Model conditional operator
 correctly.

---
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 37862e8695cccc..80501bdca2dc21 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -671,6 +671,9 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       if (TrueLoc == FalseLoc && TrueLoc != nullptr)
         Env.setStorageLocation(*S, *TrueLoc);
     } else if (!S->getType()->isRecordType()) {
+      // The conditional operator can evaluate to either of the values of the
+      // two branches. To model this, join these two values together to yield
+      // the result of the conditional operator.
       if (Value *Val = Environment::joinValues(
               S->getType(), TrueEnv->getValue(*S->getTrueExpr()), *TrueEnv,
               FalseEnv->getValue(*S->getFalseExpr()), *FalseEnv, Env, Model))

>From 2ae23cf097fc560f940965371f211f216103d5c9 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Fri, 19 Apr 2024 07:19:13 +0000
Subject: [PATCH 3/4] fixup! fixup! [clang][dataflow] Model conditional
 operator correctly.

---
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 80501bdca2dc21..ead9dba69d6bc5 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -661,8 +661,12 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
     const Environment *TrueEnv = StmtToEnv.getEnvironment(*S->getTrueExpr());
     const Environment *FalseEnv = StmtToEnv.getEnvironment(*S->getFalseExpr());
 
-    if (TrueEnv == nullptr || FalseEnv == nullptr)
+    if (TrueEnv == nullptr || FalseEnv == nullptr) {
+      // We should always have an environment as we should visit the true /
+      // false branches before the conditional operator.
+      assert(false);
       return;
+    }
 
     if (S->isGLValue()) {
       StorageLocation *TrueLoc = TrueEnv->getStorageLocation(*S->getTrueExpr());

>From f26df9a957cfbaff33b1d5a227adf6795168bf0b Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Mon, 22 Apr 2024 06:56:23 +0000
Subject: [PATCH 4/4] fixup! fixup! fixup! [clang][dataflow] Model conditional
 operator correctly.

---
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index ead9dba69d6bc5..3fc6f5a0d80740 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -678,6 +678,18 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       // The conditional operator can evaluate to either of the values of the
       // two branches. To model this, join these two values together to yield
       // the result of the conditional operator.
+      // Note: Most joins happen in `computeBlockInputState()`, but this case is
+      // different:
+      // - `computeBlockInputState()` (which in turn calls `Environment::join()`
+      //   joins values associated with the _same_ expression or storage
+      //   location, then associates the joined value with that expression or
+      //   storage location. This join has nothing to do with transfer --
+      //   instead, it joins together the results of performing transfer on two
+      //   different blocks.
+      // - Here, we join values associated with _different_ expressions (the
+      //   true and false branch), then associate the joined value with a third
+      //   expression (the conditional operator itself). This join is what it
+      //   means to perform transfer on the conditional operator.
       if (Value *Val = Environment::joinValues(
               S->getType(), TrueEnv->getValue(*S->getTrueExpr()), *TrueEnv,
               FalseEnv->getValue(*S->getFalseExpr()), *FalseEnv, Env, Model))



More information about the cfe-commits mailing list