[clang] [clang-tools-extra] [clang-tidy][dataflow] Add `bugprone-null-check-after-dereference` check (PR #84166)
via cfe-commits
cfe-commits at lists.llvm.org
Mon May 27 04:20:31 PDT 2024
================
@@ -0,0 +1,786 @@
+//===-- NullPointerAnalysisModel.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic null-pointer analysis model, used for finding
+// pointer null-checks after the pointer has already been dereferenced.
+//
+// Only a limited set of operations are currently recognized. Notably, pointer
+// arithmetic, null-pointer assignments and _nullable/_nonnull attributes are
+// missing as of yet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/Models/NullPointerAnalysisModel.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/AST/Stmt.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/CFGMatchSwitch.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Analysis/FlowSensitive/MapLattice.h"
+#include "clang/Analysis/FlowSensitive/NoopLattice.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+
+namespace clang::dataflow {
+
+namespace {
+using namespace ast_matchers;
+
+constexpr char kCond[] = "condition";
+constexpr char kVar[] = "var";
+constexpr char kValue[] = "value";
+constexpr char kIsNonnull[] = "is-nonnull";
+constexpr char kIsNull[] = "is-null";
+
+enum class SatisfiabilityResult {
+ // Returned when the value was not initialized yet.
+ Nullptr,
+ // Special value that signals that the boolean value can be anything.
+ // It signals that the underlying formulas are too complex to be calculated
+ // efficiently.
+ Top,
+ // Equivalent to the literal True in the current environment.
+ True,
+ // Equivalent to the literal False in the current environment.
+ False,
+ // Both True and False values could be produced with an appropriate set of
+ // conditions.
+ Unknown
+};
+
+enum class CompareResult {
+ Same,
+ Different,
+ Top,
+ Unknown
+};
+
+using SR = SatisfiabilityResult;
+using CR = CompareResult;
+
+// FIXME: These AST matchers should also be exported via the
+// NullPointerAnalysisModel class, for tests
+auto ptrWithBinding(llvm::StringRef VarName = kVar) {
+ return traverse(TK_IgnoreUnlessSpelledInSource,
+ expr(hasType(isAnyPointer())).bind(VarName));
+}
+
+auto derefMatcher() {
+ return unaryOperator(hasOperatorName("*"), hasUnaryOperand(ptrWithBinding()));
+}
+
+auto arrowMatcher() {
+ return memberExpr(allOf(isArrow(), hasObjectExpression(ptrWithBinding())));
+}
+
+auto castExprMatcher() {
+ return castExpr(hasCastKind(CK_PointerToBoolean),
+ hasSourceExpression(ptrWithBinding()))
+ .bind(kCond);
+}
+
+auto nullptrMatcher(llvm::StringRef VarName = kVar) {
+ return castExpr(hasCastKind(CK_NullToPointer)).bind(VarName);
+}
+
+auto addressofMatcher() {
+ return unaryOperator(hasOperatorName("&")).bind(kVar);
+}
+
+auto functionCallMatcher() {
+ return callExpr(
+ callee(functionDecl(hasAnyParameter(anyOf(hasType(pointerType()),
+ hasType(referenceType()))))));
+}
+
+auto assignMatcher() {
+ return binaryOperation(isAssignmentOperator(), hasLHS(ptrWithBinding()),
+ hasRHS(expr().bind(kValue)));
+}
+
+auto nullCheckExprMatcher() {
+ return binaryOperator(hasAnyOperatorName("==", "!="),
+ hasOperands(ptrWithBinding(), nullptrMatcher(kValue)));
+}
+
+// FIXME: When TK_IgnoreUnlessSpelledInSource is removed from ptrWithBinding(),
+// this matcher should be merged with nullCheckExprMatcher().
+auto equalExprMatcher() {
+ return binaryOperator(hasAnyOperatorName("==", "!="),
+ hasOperands(ptrWithBinding(kVar),
+ ptrWithBinding(kValue)));
+}
+
+auto anyPointerMatcher() { return expr(hasType(isAnyPointer())).bind(kVar); }
+
+// Only computes simple comparisons against the literals True and False in the
+// environment. Faster, but produces many Unknown values.
+SatisfiabilityResult shallowComputeSatisfiability(BoolValue *Val,
+ const Environment &Env) {
+ if (!Val)
+ return SR::Nullptr;
+
+ if (isa<TopBoolValue>(Val))
+ return SR::Top;
+
+ if (Val == &Env.getBoolLiteralValue(true))
+ return SR::True;
+
+ if (Val == &Env.getBoolLiteralValue(false))
+ return SR::False;
+
+ return SR::Unknown;
+}
+
+// Computes satisfiability by using the flow condition. Slower, but more
+// precise.
+SatisfiabilityResult computeSatisfiability(BoolValue *Val,
+ const Environment &Env) {
+ // Early return with the fast compute values.
+ if (SatisfiabilityResult ShallowResult =
+ shallowComputeSatisfiability(Val, Env);
+ ShallowResult != SR::Unknown) {
+ return ShallowResult;
+ }
+
+ if (Env.proves(Val->formula()))
+ return SR::True;
+
+ if (Env.proves(Env.arena().makeNot(Val->formula())))
+ return SR::False;
+
+ return SR::Unknown;
+}
+
+inline BoolValue &getVal(llvm::StringRef Name, Value &RootValue) {
+ return *cast<BoolValue>(RootValue.getProperty(Name));
+}
+
+// Assigns initial pointer null- and nonnull-values to a given Value.
+void initializeRootValue(Value &RootValue, Environment &Env) {
+ Arena &A = Env.arena();
+
+ auto *IsNull = cast_or_null<BoolValue>(RootValue.getProperty(kIsNull));
+ auto *IsNonnull = cast_or_null<BoolValue>(RootValue.getProperty(kIsNonnull));
+
+ if (!IsNull) {
+ IsNull = &A.makeAtomValue();
+ RootValue.setProperty(kIsNull, *IsNull);
+ }
+
+ if (!IsNonnull) {
+ IsNonnull = &A.makeAtomValue();
+ RootValue.setProperty(kIsNonnull, *IsNonnull);
+ }
+
+ // If the pointer cannot have either a null or nonnull value, the state is
+ // unreachable.
+ // FIXME: This condition is added in all cases when getValue() is called.
+ // The reason is that on a post-visit step, the initialized Values are used,
+ // but the flow condition does not have this constraint yet.
+ // The framework provides deduplication for constraints, so should not have a
+ // performance impact.
+ Env.assume(A.makeOr(IsNull->formula(), IsNonnull->formula()));
+}
+
+void setGLValue(const Expr &E, Value &Val, Environment &Env) {
+ StorageLocation *SL = Env.getStorageLocation(E);
+ if (!SL) {
+ SL = &Env.createStorageLocation(E);
+ Env.setStorageLocation(E, *SL);
+ }
+
+ Env.setValue(*SL, Val);
+}
+
+void setUnknownValue(const Expr &E, Value &Val, Environment &Env) {
+ if (E.isGLValue())
+ setGLValue(E, Val, Env);
+ else
+ Env.setValue(E, Val);
+}
+
+Value *getValue(const Expr &Var, Environment &Env) {
+ if (Value *EnvVal = Env.getValue(Var)) {
+ // FIXME: The framework usually creates the values for us, but without the
+ // null-properties.
+ initializeRootValue(*EnvVal, Env);
+
+ return EnvVal;
+ }
+
+ Value *RootValue = Env.createValue(Var.getType());
+
+ initializeRootValue(*RootValue, Env);
+
+ setUnknownValue(Var, *RootValue, Env);
+
+ return RootValue;
+}
+
+void matchDereferenceExpr(const Stmt *stmt,
+ const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(Var != nullptr);
+
+ Value *RootValue = getValue(*Var, Env);
+
+ BoolValue &IsNull = getVal(kIsNull, *RootValue);
+
+ if (&IsNull == &Env.makeTopBoolValue())
+ return;
+
+ Env.assume(Env.arena().makeNot(IsNull.formula()));
+}
+
+void matchNullCheckExpr(const Expr *NullCheck,
+ const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(Var != nullptr);
+
+ // (bool)p or (p != nullptr)
+ bool IsNonnullOp = true;
+ if (auto *BinOp = dyn_cast<BinaryOperator>(NullCheck);
+ BinOp->getOpcode() == BO_EQ) {
+ IsNonnullOp = false;
+ }
+
+ Value *RootValue = getValue(*Var, Env);
+
+ Arena &A = Env.arena();
+ BoolValue &IsNonnull = getVal(kIsNonnull, *RootValue);
+ BoolValue &IsNull = getVal(kIsNull, *RootValue);
+
+ if (&IsNonnull == &Env.makeTopBoolValue() ||
+ &IsNull == &Env.makeTopBoolValue())
+ return;
+
+ BoolValue *CondValue = cast_or_null<BoolValue>(Env.getValue(*NullCheck));
+ if (!CondValue) {
+ CondValue = &A.makeAtomValue();
+ Env.setValue(*NullCheck, *CondValue);
+ }
+ const Formula &CondFormula = IsNonnullOp ? CondValue->formula()
+ : A.makeNot(CondValue->formula());
+
+ Env.assume(A.makeImplies(CondFormula, A.makeNot(IsNull.formula())));
+ Env.assume(A.makeImplies(A.makeNot(CondFormula),
+ A.makeNot(IsNonnull.formula())));
+}
+
+void matchEqualExpr(const BinaryOperator *EqualExpr,
+ const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ bool IsNotEqualsOp = EqualExpr->getOpcode() == BO_NE;
+
+ const auto *LHSVar = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(LHSVar != nullptr);
+
+ const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue);
+ assert(RHSVar != nullptr);
+
+ Arena &A = Env.arena();
+ Value *LHSValue = getValue(*LHSVar, Env);
+ Value *RHSValue = getValue(*RHSVar, Env);
+
+ BoolValue &LHSNonnull = getVal(kIsNonnull, *LHSValue);
+ BoolValue &LHSNull = getVal(kIsNull, *LHSValue);
+ BoolValue &RHSNonnull = getVal(kIsNonnull, *RHSValue);
+ BoolValue &RHSNull = getVal(kIsNull, *RHSValue);
+
+ if (&LHSNonnull == &Env.makeTopBoolValue() ||
+ &RHSNonnull == &Env.makeTopBoolValue() ||
+ &LHSNull == &Env.makeTopBoolValue() ||
+ &RHSNull == &Env.makeTopBoolValue())
+ return;
+
+ BoolValue *CondValue = cast_or_null<BoolValue>(Env.getValue(*EqualExpr));
+ if (!CondValue) {
+ CondValue = &A.makeAtomValue();
+ Env.setValue(*EqualExpr, *CondValue);
+ }
+
+ const Formula &CondFormula = IsNotEqualsOp ? A.makeNot(CondValue->formula())
+ : CondValue->formula();
+
+ // FIXME: Simplify formulas
+ // If the pointers are equal, the nullability properties are the same.
+ Env.assume(A.makeImplies(CondFormula,
+ A.makeAnd(A.makeEquals(LHSNull.formula(), RHSNull.formula()),
+ A.makeEquals(LHSNonnull.formula(), RHSNonnull.formula()))));
+
+ // If the pointers are not equal, at most one of the pointers is null.
+ Env.assume(A.makeImplies(A.makeNot(CondFormula),
+ A.makeNot(A.makeAnd(LHSNull.formula(), RHSNull.formula()))));
+}
+
+void matchNullptrExpr(const Expr *expr, const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ const auto *PrVar = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(PrVar != nullptr);
+
+ Value *RootValue = Env.getValue(*PrVar);
+ if (!RootValue) {
+ RootValue = Env.createValue(PrVar->getType());
+ assert(RootValue && "Failed to create nullptr value");
+ Env.setValue(*PrVar, *RootValue);
+ }
+
+ RootValue->setProperty(kIsNull, Env.getBoolLiteralValue(true));
+ RootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(false));
+}
+
+void matchAddressofExpr(const Expr *expr,
+ const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(Var != nullptr);
+
+ Value *RootValue = Env.getValue(*Var);
+ if (!RootValue) {
+ RootValue = Env.createValue(Var->getType());
+
+ if (!RootValue)
+ return;
+
+ setUnknownValue(*Var, *RootValue, Env);
+ }
+
+ RootValue->setProperty(kIsNull, Env.getBoolLiteralValue(false));
+ RootValue->setProperty(kIsNonnull, Env.getBoolLiteralValue(true));
+}
+
+void matchPtrArgFunctionExpr(const CallExpr *fncall,
+ const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ for (const auto *Arg : fncall->arguments()) {
+ // FIXME: Add handling for reference types as arguments
+ if (Arg->getType()->isPointerType()) {
+ PointerValue *OuterValue = cast_or_null<PointerValue>(
+ Env.getValue(*Arg));
+
+ if (!OuterValue)
+ continue;
+
+ QualType InnerType = Arg->getType()->getPointeeType();
+ if (!InnerType->isPointerType())
+ continue;
+
+ StorageLocation &InnerLoc = OuterValue->getPointeeLoc();
+
+ PointerValue *InnerValue =
+ cast_or_null<PointerValue>(Env.getValue(InnerLoc));
+
+ if (!InnerValue)
+ continue;
+
+ Value *NewValue = Env.createValue(InnerType);
+ assert(NewValue && "Failed to re-initialize a pointer's value");
+
+ Env.setValue(InnerLoc, *NewValue);
+
+ // FIXME: Recursively invalidate all member pointers of eg. a struct
+ // Should be part of the framework, most likely.
+ }
+ }
+
+ if (fncall->getCallReturnType(*Result.Context)->isPointerType()) {
+ Value *RootValue = Env.createValue(
+ fncall->getCallReturnType(*Result.Context));
+ if (!RootValue)
+ return;
+
+ setUnknownValue(*fncall, *RootValue, Env);
+ }
+}
+
+void matchAnyPointerExpr(const Expr *fncall,
+ const MatchFinder::MatchResult &Result,
+ Environment &Env) {
+ // This is not necessarily a prvalue, since operators such as prefix ++ are
+ // lvalues.
+ const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(Var != nullptr);
+
+ // Initialize to (Unknown, Unknown)
+ if (Env.getValue(*Var))
+ return;
+
+ Value *RootValue = Env.createValue(Var->getType());
+
+ // initializeRootValue(*RootValue, Env);
+ setUnknownValue(*Var, *RootValue, Env);
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseDerefLocation(const Expr *Deref, const MatchFinder::MatchResult &Result,
+ NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+ auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+ const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(Var != nullptr);
+
+ Value *RootValue = Env.getValue(*Var);
+ if (!RootValue)
+ return {};
+
+ // Dereferences are always the highest priority when giving a single location
+ // FIXME: Do not replace other dereferences, only other Expr's
+ auto It = ValToDerefLoc.try_emplace(RootValue, nullptr).first;
+
+ It->second = Deref;
+
+ return {};
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseAssignLocation(const Expr *Assign,
+ const MatchFinder::MatchResult &Result,
+ NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+ auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+ const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue);
+ assert(RHSVar != nullptr);
+
+ Value *RHSValue = Env.getValue(*RHSVar);
+ if (!RHSValue)
+ return {};
+
+ auto [It, Inserted] = ValToDerefLoc.try_emplace(RHSValue, nullptr);
+
+ if (Inserted)
+ It->second = Assign;
+
+ return {};
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseNullCheckExpr(const Expr *NullCheck,
+ const MatchFinder::MatchResult &Result,
+ const NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+ auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+ const auto *Var = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(Var != nullptr);
+
+ if (Value *RootValue = Env.getValue(*Var)) {
+ // FIXME: The framework usually creates the values for us, but without the
+ // nullability properties.
+ if (RootValue->getProperty(kIsNull) && RootValue->getProperty(kIsNonnull)) {
+ bool IsNull = Env.allows(getVal(kIsNull, *RootValue).formula());
+ bool IsNonnull = Env.allows(getVal(kIsNonnull, *RootValue).formula());
+
+ if (!IsNull && IsNonnull) {
+ // FIXME: Separate function
+ bool Inserted =
+ WarningLocToVal.try_emplace(Var->getBeginLoc(), RootValue).second;
+ assert(Inserted && "multiple warnings at the same source location");
+ (void)Inserted;
+
+ return {{}, {Var->getBeginLoc()}};
+ }
+
+ if (IsNull && !IsNonnull) {
+ bool Inserted =
+ WarningLocToVal.try_emplace(Var->getBeginLoc(), RootValue).second;
+ assert(Inserted && "multiple warnings at the same source location");
+ (void)Inserted;
+
+ return {{Var->getBeginLoc()}, {}};
+ }
+ }
+
+ // If no matches are found, the null-check itself signals a special location
+ auto [It, Inserted] = ValToDerefLoc.try_emplace(RootValue, nullptr);
+
+ if (Inserted)
+ It->second = NullCheck;
+ }
+
+ return {};
+}
+
+NullCheckAfterDereferenceDiagnoser::ResultType
+diagnoseEqualExpr(const Expr *PtrCheck, const MatchFinder::MatchResult &Result,
+ NullCheckAfterDereferenceDiagnoser::DiagnoseArgs &Data) {
+ auto [ValToDerefLoc, WarningLocToVal, Env] = Data;
+
+ const auto *LHSVar = Result.Nodes.getNodeAs<Expr>(kVar);
+ assert(LHSVar != nullptr);
+ const auto *RHSVar = Result.Nodes.getNodeAs<Expr>(kValue);
+ assert(RHSVar != nullptr);
+
+ Arena &A = Env.arena();
+ std::vector<SourceLocation> NullVarLocations;
+
+ if (Value *LHSValue = Env.getValue(*LHSVar);
+ Env.proves(A.makeNot(getVal(kIsNonnull, *LHSValue).formula()))) {
+ WarningLocToVal.try_emplace(LHSVar->getBeginLoc(), LHSValue);
+ NullVarLocations.push_back(LHSVar->getBeginLoc());
+ }
+
+ if (Value *RHSValue = Env.getValue(*RHSVar);
+ Env.proves(A.makeNot(getVal(kIsNonnull, *RHSValue).formula()))) {
+ WarningLocToVal.try_emplace(RHSVar->getBeginLoc(), RHSValue);
+ NullVarLocations.push_back(RHSVar->getBeginLoc());
+ }
+
+ return {NullVarLocations, {}};
+}
+
+auto buildTransferMatchSwitch() {
+ return CFGMatchSwitchBuilder<Environment>()
+ .CaseOfCFGStmt<Stmt>(derefMatcher(), matchDereferenceExpr)
+ .CaseOfCFGStmt<Stmt>(arrowMatcher(), matchDereferenceExpr)
+ .CaseOfCFGStmt<Expr>(nullptrMatcher(), matchNullptrExpr)
+ .CaseOfCFGStmt<Expr>(addressofMatcher(), matchAddressofExpr)
+ .CaseOfCFGStmt<CallExpr>(functionCallMatcher(), matchPtrArgFunctionExpr)
+ .CaseOfCFGStmt<Expr>(anyPointerMatcher(), matchAnyPointerExpr)
+ .CaseOfCFGStmt<Expr>(castExprMatcher(), matchNullCheckExpr)
+ .CaseOfCFGStmt<Expr>(nullCheckExprMatcher(), matchNullCheckExpr)
+ .CaseOfCFGStmt<BinaryOperator>(equalExprMatcher(), matchEqualExpr)
+ .Build();
+}
+
+auto buildBranchTransferMatchSwitch() {
+ return ASTMatchSwitchBuilder<Stmt, NullPointerAnalysisModel::TransferArgs>()
+ // .CaseOf<CastExpr>(castExprMatcher(), matchNullCheckExpr)
+ // .CaseOf<BinaryOperator>(equalExprMatcher(), matchEqualExpr)
----------------
martinboehme wrote:
These are commented out, so this transfer function currently doesn't appear to do anything.
Just get rid of this transfer function? Or is it actually needed?
https://github.com/llvm/llvm-project/pull/84166
More information about the cfe-commits
mailing list