[clang] [clang-tools-extra] [clang-tidy][dataflow] Add `bugprone-null-check-after-dereference` check (PR #84166)

Dmitri Gribenko via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 6 10:33:00 PST 2024


================
@@ -0,0 +1,625 @@
+//===-- NullPointerAnalysisModel.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic null-pointer analysis model, used for finding
+// pointer null-checks after the pointer has already been dereferenced.
+//
+// Only a limited set of operations are currently recognized. Notably, pointer
+// arithmetic, null-pointer assignments and _nullable/_nonnull attributes are
+// missing as of yet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Stmt.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Analysis/FlowSensitive/MapLattice.h"
+#include "clang/Analysis/FlowSensitive/NoopLattice.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+
+namespace clang::dataflow {
+
+namespace {
+using namespace ast_matchers;
+
+constexpr char kCond[] = "condition";
+constexpr char kVar[] = "var";
+constexpr char kValue[] = "value";
+constexpr char kIsNonnull[] = "is-nonnull";
+constexpr char kIsNull[] = "is-null";
+
+enum class SatisfiabilityResult {
+  // Returned when the value was not initialized yet.
+  Nullptr,
+  // Special value that signals that the boolean value can be anything.
+  // It signals that the underlying formulas are too complex to be calculated
+  // efficiently.
+  Top,
+  // Equivalent to the literal True in the current environment.
+  True,
+  // Equivalent to the literal False in the current environment.
+  False,
+  // Both True and False values could be produced with an appropriate set of
+  // conditions.
+  Unknown
+};
+
+using SR = SatisfiabilityResult;
+
+// FIXME: These AST matchers should also be exported via the
+// NullPointerAnalysisModel class, for tests
+auto ptrToVar(llvm::StringRef VarName = kVar) {
+  return traverse(TK_IgnoreUnlessSpelledInSource,
+                  declRefExpr(hasType(isAnyPointer())).bind(VarName));
+}
+
+auto derefMatcher() {
+  return traverse(
+      TK_IgnoreUnlessSpelledInSource,
+      unaryOperator(hasOperatorName("*"), hasUnaryOperand(ptrToVar())));
+}
+
+auto arrowMatcher() {
+  return traverse(
+      TK_IgnoreUnlessSpelledInSource,
+      memberExpr(allOf(isArrow(), hasObjectExpression(ptrToVar()))));
+}
+
+auto castExprMatcher() {
+  return castExpr(hasCastKind(CK_PointerToBoolean),
+                  hasSourceExpression(ptrToVar()))
+      .bind(kCond);
+}
+
+auto nullptrMatcher() {
+  return castExpr(hasCastKind(CK_NullToPointer)).bind(kVar);
+}
+
+auto addressofMatcher() {
+  return unaryOperator(hasOperatorName("&")).bind(kVar);
+}
+
+auto functionCallMatcher() {
+  return callExpr(hasDeclaration(functionDecl(returns(isAnyPointer()))))
+      .bind(kVar);
+}
+
+auto assignMatcher() {
+  return binaryOperation(isAssignmentOperator(), hasLHS(ptrToVar()),
+                         hasRHS(expr().bind(kValue)));
+}
+
+auto anyPointerMatcher() { return expr(hasType(isAnyPointer())).bind(kVar); }
+
+// Only computes simple comparisons against the literals True and False in the
+// environment. Faster, but produces many Unknown values.
+SatisfiabilityResult shallowComputeSatisfiability(BoolValue *Val,
+                                                  const Environment &Env) {
+  if (!Val)
+    return SR::Nullptr;
+
+  if (isa<TopBoolValue>(Val))
+    return SR::Top;
+
+  if (Val == &Env.getBoolLiteralValue(true))
+    return SR::True;
+
+  if (Val == &Env.getBoolLiteralValue(false))
+    return SR::False;
+
+  return SR::Unknown;
+}
+
+// Computes satisfiability by using the flow condition. Slower, but more
+// precise.
+SatisfiabilityResult computeSatisfiability(BoolValue *Val,
+                                           const Environment &Env) {
+  // Early return with the fast compute values.
+  if (SatisfiabilityResult ShallowResult =
+          shallowComputeSatisfiability(Val, Env);
+      ShallowResult != SR::Unknown) {
+    return ShallowResult;
+  }
+
+  if (Env.proves(Val->formula()))
+    return SR::True;
+
+  if (Env.proves(Env.arena().makeNot(Val->formula())))
+    return SR::False;
+
+  return SR::Unknown;
+}
+
+inline BoolValue &getVal(llvm::StringRef Name, Value &RootValue) {
+  return *cast<BoolValue>(RootValue.getProperty(Name));
+}
+
+// Assigns initial pointer null- and nonnull-values to a given Value.
+void initializeRootValue(Value &RootValue, Environment &Env) {
+  Arena &A = Env.arena();
+
+  BoolValue *IsNull = cast_or_null<BoolValue>(RootValue.getProperty(kIsNull));
+  BoolValue *IsNonnull =
+      cast_or_null<BoolValue>(RootValue.getProperty(kIsNonnull));
+
+  if (!IsNull) {
+    IsNull = &A.makeAtomValue();
+    RootValue.setProperty(kIsNull, *IsNull);
+  }
+
+  if (!IsNonnull) {
+    IsNonnull = &A.makeAtomValue();
+    RootValue.setProperty(kIsNonnull, *IsNonnull);
+  }
+
+  // If the pointer cannot have either a null or nonull value, the state is
+  // unreachable.
+  // FIXME: This condition is added in all cases when getValue() is called.
+  // The reason is that on a post-visit step, the initialized Values are used,
+  // but the flow condition does not have this constraint yet.
+  // The framework provides deduplication for constraints, so should not have a
+  // performance impact.
+  Env.assume(A.makeOr(IsNull->formula(), IsNonnull->formula()));
+}
+
+void setGLValue(const Expr &E, Value &Val, Environment &Env) {
+  StorageLocation *SL = Env.getStorageLocation(E);
+  if (!SL) {
+    SL = &Env.createStorageLocation(E);
+    Env.setStorageLocation(E, *SL);
+  }
+
+  Env.setValue(*SL, Val);
+}
+
+void setUnknownValue(const Expr &E, Value &Val, Environment &Env) {
+  if (E.isGLValue())
+    setGLValue(E, Val, Env);
+  else if (E.isPRValue())
+    Env.setValue(E, Val);
+  else
+    llvm_unreachable("all value cases covered");
+}
+
+Value *getValue(const Expr &Var, Environment &Env) {
+  if (auto *EnvVal = Env.getValue(Var)) {
+    // FIXME: The framework usually creates the values for us, but without the
+    // null-properties.
+    initializeRootValue(*EnvVal, Env);
+
+    return EnvVal;
+  }
+
+  auto *RootValue = Env.createValue(Var.getType());
+
+  if (!RootValue)
+    return nullptr;
+
+  initializeRootValue(*RootValue, Env);
+
+  setGLValue(Var, *RootValue, Env);
+
+  return RootValue;
+}
+
+void matchDereferenceExpr(const Stmt *stmt,
+                          const MatchFinder::MatchResult &Result,
+                          Environment &Env) {
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  auto *RootValue = getValue(*Var, Env);
+  if (!RootValue) {
+    return;
+  }
+
+  Env.assume(Env.arena().makeNot(getVal(kIsNull, *RootValue).formula()));
+}
+
+void matchCastExpr(const CastExpr *cond, const MatchFinder::MatchResult &Result,
+                   NullPointerAnalysisModel::TransferArgs &Data) {
+  auto [IsNonnullBranch, Env] = Data;
+
+  const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+  assert(Var != nullptr);
+
+  auto *RootValue = getValue(*Var, Env);
+  if (!RootValue) {
+    return;
+  }
+
+  auto *NewRootValue = Env.createValue(Var->getType());
+  if (!NewRootValue)
+    return;
+
+  setGLValue(*Var, *NewRootValue, Env);
+
+  Arena &A = Env.arena();
+  BoolValue &IsNonnull = getVal(kIsNonnull, *RootValue);
+  BoolValue &IsNull = getVal(kIsNull, *RootValue);
+
+  if (IsNonnullBranch) {
+    Env.assume(A.makeNot(IsNull.formula()));
+    NewRootValue->setProperty(kIsNull, Env.getBoolLiteralValue(false));
+
+    NewRootValue->setProperty(kIsNonnull, IsNonnull);
+  } else {
+    NewRootValue->setProperty(kIsNull, IsNull);
+
+    Env.assume(A.makeNot(IsNonnull.formula()));
+    NewRootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(false));
+  }
----------------
gribozavr wrote:

I think this function could be significantly simplified.

You should check if the Expr argument of the cast has a modeled pointer value, and if it is, then one of the booleans that you're storing as synthetic properties on that pointer value would be the result of the pointer to bool cast.

See https://github.com/google/crubit/blob/085108ea0a3b8da2e400613d7d235274de1f34b9/nullability/pointer_nullability_analysis.cc#L822 for a sample transfer function for a cast.

If not all variables reliably get a modeled pointer value (I'm guessing this is the case because a good chunk of this transfer function is dedicated to creating values), then that problem should be solved uniformly for all cases. PTAL at `unpackPointerValue`, `initPointerNullState` and `initPointerFromTypeNullability` in our code, and how they are used - specifically `transferValue_Pointer` and that it is called for every pointer-typed expression, not just variables. For the initialization of synthetic properties (`kIsNull` etc.) the idea is to call initialization in a central place, so that each individual transfer function does not need to worry about it.

`unpackPointerValue` also handles "top" and I think your code would benefit from something similar, as it leads to significantly better convergence of loop analysis.


https://github.com/llvm/llvm-project/pull/84166


More information about the cfe-commits mailing list