[clang] [clang][analyzer] Introduce MutexModeling checker (PR #111381)

Tue Oct 8 06:44:37 PDT 2024

================
@@ -0,0 +1,773 @@
+//===--- MutexModeling.cpp - Modeling of mutexes --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines modeling checker for tracking mutex states.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MutexModeling/MutexModelingAPI.h"
+#include "MutexModeling/MutexModelingDomain.h"
+#include "MutexModeling/MutexRegionExtractor.h"
+
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
+#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
+#include <memory>
+
+using namespace clang;
+using namespace ento;
+using namespace mutex_modeling;
+
+namespace {
+
+// When a lock is destroyed, in some semantics(like PthreadSemantics) we are not
+// sure if the destroy call has succeeded or failed, and the lock enters one of
+// the 'possibly destroyed' state. There is a short time frame for the
+// programmer to check the return value to see if the lock was successfully
+// destroyed. Before we model the next operation over that lock, we call this
+// function to see if the return value was checked by now and set the lock state
+// - either to destroyed state or back to its previous state.
+
+// In PthreadSemantics, pthread_mutex_destroy() returns zero if the lock is
+// successfully destroyed and it returns a non-zero value otherwise.
+ProgramStateRef resolvePossiblyDestroyedMutex(ProgramStateRef State,
+                                              const MemRegion *LockReg,
+                                              const SymbolRef *LockReturnSym) {
+  const LockStateKind *LockState = State->get<LockStates>(LockReg);
+  // Existence in DestroyRetVal ensures existence in LockMap.
+  // Existence in Destroyed also ensures that the lock state for lockR is either
+  // UntouchedAndPossiblyDestroyed or UnlockedAndPossiblyDestroyed.
+  assert(LockState);
+  assert(*LockState == LockStateKind::UntouchedAndPossiblyDestroyed ||
+         *LockState == LockStateKind::UnlockedAndPossiblyDestroyed);
+
+  ConstraintManager &CMgr = State->getConstraintManager();
+  ConditionTruthVal RetZero = CMgr.isNull(State, *LockReturnSym);
+  if (RetZero.isConstrainedFalse()) {
+    switch (*LockState) {
+    case LockStateKind::UntouchedAndPossiblyDestroyed: {
+      State = State->remove<LockStates>(LockReg);
+      break;
+    }
+    case LockStateKind::UnlockedAndPossiblyDestroyed: {
+      State = State->set<LockStates>(LockReg, LockStateKind::Unlocked);
+      break;
+    }
+    default:
+      llvm_unreachable("Unknown lock state for a lock inside DestroyRetVal");
+    }
+  } else {
+    State = State->set<LockStates>(LockReg, LockStateKind::Destroyed);
+  }
+
+  // Removing the map entry (LockReg, sym) from DestroyRetVal as the lock
+  // state is now resolved.
+  return State->remove<DestroyedRetVals>(LockReg);
+}
+
+ProgramStateRef doResolvePossiblyDestroyedMutex(ProgramStateRef State,
+                                                const MemRegion *MTX) {
+  assert(MTX && "should only be called with a mutex region");
+
+  if (const SymbolRef *Sym = State->get<DestroyedRetVals>(MTX))
+    return resolvePossiblyDestroyedMutex(State, MTX, Sym);
+  return State;
+}
+
+class MutexModeling : public Checker<check::PostCall, check::DeadSymbols,
+                                     check::RegionChanges> {
+public:
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+
+  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
+
+  ProgramStateRef
+  checkRegionChanges(ProgramStateRef State, const InvalidatedSymbols *Symbols,
+                     ArrayRef<const MemRegion *> ExplicitRegions,
+                     ArrayRef<const MemRegion *> Regions,
+                     const LocationContext *LCtx, const CallEvent *Call) const;
+
+private:
+  mutable std::unique_ptr<BugType> BT_initlock;
+
+  // When handling events, NoteTags can be placed on ProgramPoints. This struct
+  // supports returning both the resulting ProgramState and a NoteTag.
+  struct ModelingResult {
+    ProgramStateRef State;
+    const NoteTag *Note = nullptr;
+  };
+
+  ModelingResult handleInit(const EventDescriptor &Event, const MemRegion *MTX,
+                            const CallEvent &Call, ProgramStateRef State,
+                            CheckerContext &C) const;
+  ModelingResult onSuccessfulAcquire(const MemRegion *MTX,
+                                     const CallEvent &Call,
+                                     ProgramStateRef State,
+                                     CheckerContext &C) const;
+  ModelingResult markCritSection(ModelingResult InputState,
+                                 const MemRegion *MTX, const CallEvent &Call,
+                                 CheckerContext &C) const;
+  ModelingResult handleAcquire(const EventDescriptor &Event,
+                               const MemRegion *MTX, const CallEvent &Call,
+                               ProgramStateRef State, CheckerContext &C) const;
+  ModelingResult handleTryAcquire(const EventDescriptor &Event,
+                                  const MemRegion *MTX, const CallEvent &Call,
+                                  ProgramStateRef State,
+                                  CheckerContext &C) const;
+  ModelingResult handleRelease(const EventDescriptor &Event,
+                               const MemRegion *MTX, const CallEvent &Call,
+                               ProgramStateRef State, CheckerContext &C) const;
+  ModelingResult handleDestroy(const EventDescriptor &Event,
+                               const MemRegion *MTX, const CallEvent &Call,
+                               ProgramStateRef State, CheckerContext &C) const;
+  ModelingResult handleEvent(const EventDescriptor &Event, const MemRegion *MTX,
+                             const CallEvent &Call, ProgramStateRef State,
+                             CheckerContext &C) const;
+};
+
+} // namespace
+
+MutexModeling::ModelingResult
+MutexModeling::handleInit(const EventDescriptor &Event, const MemRegion *MTX,
+                          const CallEvent &Call, ProgramStateRef State,
+                          CheckerContext &C) const {
+  ModelingResult Result{State->set<LockStates>(MTX, LockStateKind::Unlocked)};
+
+  const LockStateKind *LockState = State->get<LockStates>(MTX);
+
+  if (!LockState)
+    return Result;
+
+  switch (*LockState) {
+  case (LockStateKind::Destroyed): {
+    Result.State = State->set<LockStates>(MTX, LockStateKind::Unlocked);
+    break;
+  }
+  case (LockStateKind::Locked): {
+    Result.State =
+        State->set<LockStates>(MTX, LockStateKind::Error_DoubleInitWhileLocked);
+    break;
+  }
+  default: {
+    Result.State = State->set<LockStates>(MTX, LockStateKind::Error_DoubleInit);
+    break;
+  }
+  }
+
+  return Result;
+}
+
+MutexModeling::ModelingResult
+MutexModeling::markCritSection(MutexModeling::ModelingResult InputState,
+                               const MemRegion *MTX, const CallEvent &Call,
+                               CheckerContext &C) const {
+  const CritSectionMarker MarkToAdd{Call.getOriginExpr(), MTX};
+  return {InputState.State->add<CritSections>(MarkToAdd),
+          CreateMutexCritSectionNote(MarkToAdd, C)};
+}
+
+MutexModeling::ModelingResult
+MutexModeling::onSuccessfulAcquire(const MemRegion *MTX, const CallEvent &Call,
+                                   ProgramStateRef State,
+                                   CheckerContext &C) const {
+  ModelingResult Result{State};
+
+  const LockStateKind *LockState = State->get<LockStates>(MTX);
+
+  if (!LockState) {
+    Result.State = Result.State->set<LockStates>(MTX, LockStateKind::Locked);
+  } else {
+    switch (*LockState) {
+    case LockStateKind::Unlocked:
+      Result.State = Result.State->set<LockStates>(MTX, LockStateKind::Locked);
+      break;
+    case LockStateKind::Locked:
+      Result.State =
+          Result.State->set<LockStates>(MTX, LockStateKind::Error_DoubleLock);
+      break;
+    case LockStateKind::Destroyed:
+      Result.State = Result.State->set<LockStates>(
+          MTX, LockStateKind::Error_LockDestroyed);
+      break;
+    default:
+      break;
+    }
+  }
+
+  Result = markCritSection(Result, MTX, Call, C);
+  return Result;
+}
+
+MutexModeling::ModelingResult
+MutexModeling::handleAcquire(const EventDescriptor &Event, const MemRegion *MTX,
+                             const CallEvent &Call, ProgramStateRef State,
+                             CheckerContext &C) const {
+
+  switch (Event.Semantics) {
+  case SemanticsKind::PthreadSemantics: {
+    // Assume that the return value was 0.
+    SVal RetVal = Call.getReturnValue();
+    if (auto DefinedRetVal = RetVal.getAs<DefinedSVal>()) {
+      // FIXME: If the lock function was inlined and returned true,
+      // we need to behave sanely - at least generate sink.
+      State = State->assume(*DefinedRetVal, false);
+      assert(State);
+    }
+    // We might want to handle the case when the mutex lock function was
+    // inlined and returned an Unknown or Undefined value.
+    break;
+  }
+  case SemanticsKind::XNUSemantics:
+    // XNU semantics return void on non-try locks.
+    break;
+  default:
+    llvm_unreachable(
+        "Acquire events should have either Pthread or XNU semantics");
+  }
+
+  return onSuccessfulAcquire(MTX, Call, State, C);
+}
+
+MutexModeling::ModelingResult MutexModeling::handleTryAcquire(
+    const EventDescriptor &Event, const MemRegion *MTX, const CallEvent &Call,
+    ProgramStateRef State, CheckerContext &C) const {
+
+  ProgramStateRef LockSucc{State};
+  // Bifurcate the state, and allow a mode where the lock acquisition fails.
+  SVal RetVal = Call.getReturnValue();
+  std::optional<DefinedSVal> DefinedRetVal = RetVal.getAs<DefinedSVal>();
+  // Bifurcating the state is only meaningful if the call was not inlined, but
+  // we can still reason about the return value.
+  if (!C.wasInlined && DefinedRetVal) {
+    ProgramStateRef LockFail;
+    switch (Event.Semantics) {
+    case SemanticsKind::PthreadSemantics:
+      // For PthreadSemantics, a non-zero return value indicates success
+      std::tie(LockFail, LockSucc) = State->assume(*DefinedRetVal);
+      break;
+    case SemanticsKind::XNUSemantics:
+      // For XNUSemantics, a zero return value indicates success
+      std::tie(LockSucc, LockFail) = State->assume(*DefinedRetVal);
+      break;
+    default:
+      llvm_unreachable("Unknown TryLock locking semantics");
+    }
+
+    // This is the bifurcation point in the ExplodedGraph, we do not need to
+    // return the new ExplodedGraph node because we do not plan on building this
+    // lock-failed case path in this checker.
+    C.addTransition(LockFail);
+  }
+
+  if (!LockSucc)
+    LockSucc = State;
----------------
NagyDonat wrote:

This `if` is surprising for me -- it essentially says that if the return value is defined and the `assume` says that it's never a success, then we still go forward on a branch where we assume that it's a success.

If you think that this is the right idea here, then please explain it briefly.

https://github.com/llvm/llvm-project/pull/111381