[clang] [clang-tools-extra] [clang-tidy][dataflow] Add `bugprone-null-check-after-dereference` check (PR #84166)

via cfe-commits cfe-commits at lists.llvm.org
Mon May 27 04:20:31 PDT 2024


================
@@ -0,0 +1,786 @@
+//===-- NullPointerAnalysisModel.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic null-pointer analysis model, used for finding
+// pointer null-checks after the pointer has already been dereferenced.
+//
+// Only a limited set of operations are currently recognized. Notably, pointer
+// arithmetic, null-pointer assignments and _nullable/_nonnull attributes are
+// missing as of yet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Stmt.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Analysis/FlowSensitive/MapLattice.h"
+#include "clang/Analysis/FlowSensitive/NoopLattice.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+
+namespace clang::dataflow {
+
+namespace {
+using namespace ast_matchers;
+
+constexpr char kCond[] = "condition";
+constexpr char kVar[] = "var";
+constexpr char kValue[] = "value";
+constexpr char kIsNonnull[] = "is-nonnull";
+constexpr char kIsNull[] = "is-null";
+
+enum class SatisfiabilityResult {
+  // Returned when the value was not initialized yet.
+  Nullptr,
+  // Special value that signals that the boolean value can be anything.
+  // It signals that the underlying formulas are too complex to be calculated
+  // efficiently.
+  Top,
+  // Equivalent to the literal True in the current environment.
+  True,
+  // Equivalent to the literal False in the current environment.
+  False,
+  // Both True and False values could be produced with an appropriate set of
+  // conditions.
+  Unknown
+};
+
+enum class CompareResult {
+  Same,
+  Different,
+  Top,
+  Unknown
+};
+
+using SR = SatisfiabilityResult;
+using CR = CompareResult;
+
+// FIXME: These AST matchers should also be exported via the
+// NullPointerAnalysisModel class, for tests
+auto ptrWithBinding(llvm::StringRef VarName = kVar) {
+  return traverse(TK_IgnoreUnlessSpelledInSource,
+      expr(hasType(isAnyPointer())).bind(VarName));
+}
+
+auto derefMatcher() {
+  return unaryOperator(hasOperatorName("*"), hasUnaryOperand(ptrWithBinding()));
+}
+
+auto arrowMatcher() {
+  return memberExpr(allOf(isArrow(), hasObjectExpression(ptrWithBinding())));
+}
+
+auto castExprMatcher() {
+  return castExpr(hasCastKind(CK_PointerToBoolean),
+                  hasSourceExpression(ptrWithBinding()))
+      .bind(kCond);
+}
+
+auto nullptrMatcher(llvm::StringRef VarName = kVar) {
+  return castExpr(hasCastKind(CK_NullToPointer)).bind(VarName);
+}
+
+auto addressofMatcher() {
+  return unaryOperator(hasOperatorName("&")).bind(kVar);
+}
+
+auto functionCallMatcher() {
+  return callExpr(
+      callee(functionDecl(hasAnyParameter(anyOf(hasType(pointerType()),
+                                                hasType(referenceType()))))));
+}
+
+auto assignMatcher() {
+  return binaryOperation(isAssignmentOperator(), hasLHS(ptrWithBinding()),
+                         hasRHS(expr().bind(kValue)));
+}
+
+auto nullCheckExprMatcher() {
+  return binaryOperator(hasAnyOperatorName("==", "!="),
+                        hasOperands(ptrWithBinding(), nullptrMatcher(kValue)));
+}
+
+// FIXME: When TK_IgnoreUnlessSpelledInSource is removed from ptrWithBinding(),
+// this matcher should be merged with nullCheckExprMatcher().
+auto equalExprMatcher() {
+  return binaryOperator(hasAnyOperatorName("==", "!="),
+                        hasOperands(ptrWithBinding(kVar),
+                                    ptrWithBinding(kValue)));
+}
+
+auto anyPointerMatcher() { return expr(hasType(isAnyPointer())).bind(kVar); }
+
+// Only computes simple comparisons against the literals True and False in the
+// environment. Faster, but produces many Unknown values.
+SatisfiabilityResult shallowComputeSatisfiability(BoolValue *Val,
+                                                  const Environment &Env) {
+  if (!Val)
+    return SR::Nullptr;
+
+  if (isa<TopBoolValue>(Val))
+    return SR::Top;
+
+  if (Val == &Env.getBoolLiteralValue(true))
+    return SR::True;
+
+  if (Val == &Env.getBoolLiteralValue(false))
+    return SR::False;
+
+  return SR::Unknown;
+}
+
+// Computes satisfiability by using the flow condition. Slower, but more
+// precise.
+SatisfiabilityResult computeSatisfiability(BoolValue *Val,
+                                           const Environment &Env) {
+  // Early return with the fast compute values.
+  if (SatisfiabilityResult ShallowResult =
+          shallowComputeSatisfiability(Val, Env);
+      ShallowResult != SR::Unknown) {
+    return ShallowResult;
+  }
+
+  if (Env.proves(Val->formula()))
+    return SR::True;
+
+  if (Env.proves(Env.arena().makeNot(Val->formula())))
+    return SR::False;
+
+  return SR::Unknown;
+}
+
+inline BoolValue &getVal(llvm::StringRef Name, Value &RootValue) {
+  return *cast<BoolValue>(RootValue.getProperty(Name));
+}
+
+// Assigns initial pointer null- and nonnull-values to a given Value.
+void initializeRootValue(Value &RootValue, Environment &Env) {
+  Arena &A = Env.arena();
+
+  auto *IsNull = cast_or_null<BoolValue>(RootValue.getProperty(kIsNull));
+  auto *IsNonnull = cast_or_null<BoolValue>(RootValue.getProperty(kIsNonnull));
+
+  if (!IsNull) {
+    IsNull = &A.makeAtomValue();
+    RootValue.setProperty(kIsNull, *IsNull);
+  }
+
+  if (!IsNonnull) {
+    IsNonnull = &A.makeAtomValue();
+    RootValue.setProperty(kIsNonnull, *IsNonnull);
+  }
+
+  // If the pointer cannot have either a null or nonnull value, the state is
+  // unreachable.
+  // FIXME: This condition is added in all cases when getValue() is called.
+  // The reason is that on a post-visit step, the initialized Values are used,
+  // but the flow condition does not have this constraint yet.
+  // The framework provides deduplication for constraints, so should not have a
+  // performance impact.
+  Env.assume(A.makeOr(IsNull->formula(), IsNonnull->formula()));
+}
+
+void setGLValue(const Expr &E, Value &Val, Environment &Env) {
+  StorageLocation *SL = Env.getStorageLocation(E);
+  if (!SL) {
+    SL = &Env.createStorageLocation(E);
+    Env.setStorageLocation(E, *SL);
+  }
+
+  Env.setValue(*SL, Val);
+}
+
+void setUnknownValue(const Expr &E, Value &Val, Environment &Env) {
+  if (E.isGLValue())
+    setGLValue(E, Val, Env);
+  else
+    Env.setValue(E, Val);
+}
+
+Value *getValue(const Expr &Var, Environment &Env) {
+  if (Value *EnvVal = Env.getValue(Var)) {
+    // FIXME: The framework usually creates the values for us, but without the
+    // null-properties.
+    initializeRootValue(*EnvVal, Env);
+
+    return EnvVal;
+  }
+
+  Value *RootValue = Env.createValue(Var.getType());
+
+  initializeRootValue(*RootValue, Env);
+
+  setUnknownValue(Var, *RootValue, Env);
+
+  return RootValue;
+}
+
+void matchDereferenceExpr(const Stmt *stmt,
+                          const MatchFinder::MatchResult &Result,
+                          Environment &Env) {
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  Value *RootValue = getValue(*Var, Env);
+
+  BoolValue &IsNull = getVal(kIsNull, *RootValue);
+
+  if (&IsNull == &Env.makeTopBoolValue())
+    return;
+
+  Env.assume(Env.arena().makeNot(IsNull.formula()));
+}
+
+void matchNullCheckExpr(const Expr *NullCheck,
+                    const MatchFinder::MatchResult &Result,
+                    Environment &Env) {
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  // (bool)p or (p != nullptr)
+  bool IsNonnullOp = true;
+  if (auto *BinOp = dyn_cast<BinaryOperator>(NullCheck);
+      BinOp->getOpcode() == BO_EQ) {
+    IsNonnullOp = false;
+  }
+
+  Value *RootValue = getValue(*Var, Env);
+
+  Arena &A = Env.arena();
+  BoolValue &IsNonnull = getVal(kIsNonnull, *RootValue);
+  BoolValue &IsNull = getVal(kIsNull, *RootValue);
+
+  if (&IsNonnull == &Env.makeTopBoolValue() ||
+      &IsNull == &Env.makeTopBoolValue())
+    return;
+
+  BoolValue *CondValue = cast_or_null<BoolValue>(Env.getValue(*NullCheck));
+  if (!CondValue) {
+    CondValue = &A.makeAtomValue();
+    Env.setValue(*NullCheck, *CondValue);
+  }
+  const Formula &CondFormula = IsNonnullOp ? CondValue->formula()
+                                             : A.makeNot(CondValue->formula());
+
+  Env.assume(A.makeImplies(CondFormula, A.makeNot(IsNull.formula())));
+  Env.assume(A.makeImplies(A.makeNot(CondFormula),
+                           A.makeNot(IsNonnull.formula())));
+}
+
+void matchEqualExpr(const BinaryOperator *EqualExpr,
+                    const MatchFinder::MatchResult &Result,
+                    Environment &Env) {
+  bool IsNotEqualsOp = EqualExpr->getOpcode() == BO_NE;
+
+  const auto *LHSVar = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(LHSVar != nullptr);
+
+  const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue);
+  assert(RHSVar != nullptr);
+
+  Arena &A = Env.arena();
+  Value *LHSValue = getValue(*LHSVar, Env);
+  Value *RHSValue = getValue(*RHSVar, Env);
+
+  BoolValue &LHSNonnull = getVal(kIsNonnull, *LHSValue);
+  BoolValue &LHSNull = getVal(kIsNull, *LHSValue);
+  BoolValue &RHSNonnull = getVal(kIsNonnull, *RHSValue);
+  BoolValue &RHSNull = getVal(kIsNull, *RHSValue);
+
+  if (&LHSNonnull == &Env.makeTopBoolValue() ||
+      &RHSNonnull == &Env.makeTopBoolValue() ||
+      &LHSNull == &Env.makeTopBoolValue() ||
+      &RHSNull == &Env.makeTopBoolValue())
+    return;
+
+  BoolValue *CondValue = cast_or_null<BoolValue>(Env.getValue(*EqualExpr));
+  if (!CondValue) {
+    CondValue = &A.makeAtomValue();
+    Env.setValue(*EqualExpr, *CondValue);
+  }
+
+  const Formula &CondFormula = IsNotEqualsOp ? A.makeNot(CondValue->formula())
+                                       : CondValue->formula();
+
+  // FIXME: Simplify formulas
+  // If the pointers are equal, the nullability properties are the same.
+  Env.assume(A.makeImplies(CondFormula, 
+      A.makeAnd(A.makeEquals(LHSNull.formula(), RHSNull.formula()),
+                A.makeEquals(LHSNonnull.formula(), RHSNonnull.formula()))));
+
+  // If the pointers are not equal, at most one of the pointers is null.
+  Env.assume(A.makeImplies(A.makeNot(CondFormula),
+      A.makeNot(A.makeAnd(LHSNull.formula(), RHSNull.formula()))));
+}
+
+void matchNullptrExpr(const Expr *expr, const MatchFinder::MatchResult &Result,
+                      Environment &Env) {
+  const auto *PrVar = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(PrVar != nullptr);
+
+  Value *RootValue = Env.getValue(*PrVar);
+  if (!RootValue) {
+    RootValue = Env.createValue(PrVar->getType());
+    assert(RootValue && "Failed to create nullptr value");
+    Env.setValue(*PrVar, *RootValue);
+  }
+
+  RootValue->setProperty(kIsNull, Env.getBoolLiteralValue(true));
+  RootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(false));
+}
+
+void matchAddressofExpr(const Expr *expr,
+                        const MatchFinder::MatchResult &Result,
+                        Environment &Env) {
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  Value *RootValue = Env.getValue(*Var);
+  if (!RootValue) {
+    RootValue = Env.createValue(Var->getType());
+
+    if (!RootValue)
+      return;
+
+    setUnknownValue(*Var, *RootValue, Env);
+  }
+
+  RootValue->setProperty(kIsNull, Env.getBoolLiteralValue(false));
+  RootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(true));
+}
+
+void matchPtrArgFunctionExpr(const CallExpr *fncall,
+                             const MatchFinder::MatchResult &Result,
+                             Environment &Env) {
+  for (const auto *Arg : fncall->arguments()) {
+    // FIXME: Add handling for reference types as arguments
+    if (Arg->getType()->isPointerType()) {
+      PointerValue *OuterValue = cast_or_null<PointerValue>(
+          Env.getValue(*Arg));
+
+      if (!OuterValue)
+        continue;
+
+      QualType InnerType = Arg->getType()->getPointeeType();
+      if (!InnerType->isPointerType())
+        continue;
+
+      StorageLocation &InnerLoc = OuterValue->getPointeeLoc();
+      
+      PointerValue *InnerValue =
+          cast_or_null<PointerValue>(Env.getValue(InnerLoc));
+
+      if (!InnerValue)
+        continue;
+      
+      Value *NewValue = Env.createValue(InnerType);
+      assert(NewValue && "Failed to re-initialize a pointer's value");
+
+      Env.setValue(InnerLoc, *NewValue);
+
+    // FIXME: Recursively invalidate all member pointers of eg. a struct
+    // Should be part of the framework, most likely.
+    }
+  }
+
+  if (fncall->getCallReturnType(*Result.Context)->isPointerType()) {
+    Value *RootValue = Env.createValue( 
+        fncall->getCallReturnType(*Result.Context));
+    if (!RootValue)
+      return;
+
+    setUnknownValue(*fncall, *RootValue, Env);
+  }
+}
+
+void matchAnyPointerExpr(const Expr *fncall,
+                         const MatchFinder::MatchResult &Result,
+                         Environment &Env) {
+  // This is not necessarily a prvalue, since operators such as prefix ++ are
+  // lvalues.
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  // Initialize to (Unknown, Unknown)
+  if (Env.getValue(*Var))
+    return;
+
+  Value *RootValue = Env.createValue(Var->getType());
+
+  // initializeRootValue(*RootValue, Env);
+  setUnknownValue(*Var, *RootValue, Env);
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseDerefLocation(const Expr *Deref, const MatchFinder::MatchResult &Result,
+                      NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+  auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  Value *RootValue = Env.getValue(*Var);
+  if (!RootValue)
+    return {};
+
+  // Dereferences are always the highest priority when giving a single location
+  // FIXME: Do not replace other dereferences, only other Expr's
+  auto It = ValToDerefLoc.try_emplace(RootValue, nullptr).first;
+
+  It->second = Deref;
+
+  return {};
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseAssignLocation(const Expr *Assign,
+                       const MatchFinder::MatchResult &Result,
+                       NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+  auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+  const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue);
+  assert(RHSVar != nullptr);
+
+  Value *RHSValue = Env.getValue(*RHSVar);
+  if (!RHSValue)
+    return {};
+
+  auto [It, Inserted] = ValToDerefLoc.try_emplace(RHSValue, nullptr);
+
+  if (Inserted)
+    It->second = Assign;
+
+  return {};
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseNullCheckExpr(const Expr *NullCheck,
+      const MatchFinder::MatchResult &Result,
+      const NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+  auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  if (Value *RootValue = Env.getValue(*Var)) {
+    // FIXME: The framework usually creates the values for us, but without the
+    // nullability properties.
+    if (RootValue->getProperty(kIsNull) && RootValue->getProperty(kIsNonnull)) {
+      bool IsNull = Env.allows(getVal(kIsNull, *RootValue).formula());
+      bool IsNonnull = Env.allows(getVal(kIsNonnull, *RootValue).formula());
+
+      if (!IsNull && IsNonnull) {
+        // FIXME: Separate function
+        bool Inserted =
+            WarningLocToVal.try_emplace(Var->getBeginLoc(), RootValue).second;
+        assert(Inserted && "multiple warnings at the same source location");
+        (void)Inserted;
+
+        return {{}, {Var->getBeginLoc()}};
+      }
+
+      if (IsNull && !IsNonnull) {
+        bool Inserted =
+            WarningLocToVal.try_emplace(Var->getBeginLoc(), RootValue).second;
+        assert(Inserted && "multiple warnings at the same source location");
+        (void)Inserted;
+
+        return {{Var->getBeginLoc()}, {}};
+      }
+    }
+
+    // If no matches are found, the null-check itself signals a special location
+    auto [It, Inserted] = ValToDerefLoc.try_emplace(RootValue, nullptr);
+
+    if (Inserted)
+      It->second = NullCheck;
+  }
+
+  return {};
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseEqualExpr(const Expr *PtrCheck, const MatchFinder::MatchResult &Result,
+                  NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+  auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+  const auto *LHSVar = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(LHSVar != nullptr);
+  const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue);
+  assert(RHSVar != nullptr);
+  
+  Arena &A = Env.arena();
+  std::vector<SourceLocation> NullVarLocations;
+
+  if (Value *LHSValue = Env.getValue(*LHSVar);
+      Env.proves(A.makeNot(getVal(kIsNonnull, *LHSValue).formula()))) {
+    WarningLocToVal.try_emplace(LHSVar->getBeginLoc(), LHSValue);
+    NullVarLocations.push_back(LHSVar->getBeginLoc());
+  }
+
+  if (Value *RHSValue = Env.getValue(*RHSVar);
+      Env.proves(A.makeNot(getVal(kIsNonnull, *RHSValue).formula()))) {
+    WarningLocToVal.try_emplace(RHSVar->getBeginLoc(), RHSValue);
+    NullVarLocations.push_back(RHSVar->getBeginLoc());
+  }
+
+  return {NullVarLocations, {}};
+}
+
+auto buildTransferMatchSwitch() {
+  return CFGMatchSwitchBuilder<Environment>()
+      .CaseOfCFGStmt<Stmt>(derefMatcher(), matchDereferenceExpr)
+      .CaseOfCFGStmt<Stmt>(arrowMatcher(), matchDereferenceExpr)
+      .CaseOfCFGStmt<Expr>(nullptrMatcher(), matchNullptrExpr)
+      .CaseOfCFGStmt<Expr>(addressofMatcher(), matchAddressofExpr)
+      .CaseOfCFGStmt<CallExpr>(functionCallMatcher(), matchPtrArgFunctionExpr)
+      .CaseOfCFGStmt<Expr>(anyPointerMatcher(), matchAnyPointerExpr)
+      .CaseOfCFGStmt<Expr>(castExprMatcher(), matchNullCheckExpr)
+      .CaseOfCFGStmt<Expr>(nullCheckExprMatcher(), matchNullCheckExpr)
+      .CaseOfCFGStmt<BinaryOperator>(equalExprMatcher(), matchEqualExpr)
+      .Build();
+}
+
+auto buildBranchTransferMatchSwitch() {
+  return ASTMatchSwitchBuilder<Stmt, NullPointerAnalysisModel::TransferArgs>()
+      // .CaseOf<CastExpr>(castExprMatcher(), matchNullCheckExpr)
+      // .CaseOf<BinaryOperator>(equalExprMatcher(), matchEqualExpr)
----------------
martinboehme wrote:

These are commented out, so this transfer function currently doesn't appear to do anything.

Just get rid of this transfer function? Or is it actually needed?

https://github.com/llvm/llvm-project/pull/84166


More information about the cfe-commits mailing list