[clang] [analyzer][NFC] Simplifications in ArrayBoundV2 (PR #67572)

via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 28 04:12:31 PDT 2023


https://github.com/DonatNagyE updated https://github.com/llvm/llvm-project/pull/67572

>From 448999ecab28ea4d9768c1a8fa5fb4bb25e4bd32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Don=C3=A1t=20Nagy?= <donat.nagy at ericsson.com>
Date: Wed, 6 Sep 2023 13:39:27 +0200
Subject: [PATCH] [analyzer][NFC] Simplifications in ArrayBoundV2

I'm planning to improve diagnostics generation in `ArrayBoundCheckerV2`
but before that I'm refactoring the source code to clean up some
over-complicated code and an inaccurate comment.

Changes in this commit:
- Remove the `mutable std::unique_ptr<BugType>` boilerplate, because
  it's no longer needed.
- Remove the code duplication between the methods `reportOOB()` and
  `reportTaintedOOB()`.
- Eliminate the class `RegionRawOffsetV2` because it's just a "reinvent
  the wheel" version of `std::pair` and it was used only once, as a
  temporary object that was immediately decomposed. (I suspect that
  `RegionRawOffset` in MemRegion.cpp could also be eliminated.)
- Flatten the code of `computeOffset()` which had contained six nested
  indentation levels before this commit.
- Ensure that `computeOffset()` returns `std::nullopt` instead of a
  `{Region, <zero array index>}` pair in the case when it encounters a
  `Location` that is not an `ElementRegion`. This ensures that the
  `checkLocation` callback returns early when it handles a memory access
  where it has "nothing to do" (no subscript operation or equivalent
  pointer arithmetic). (Note that this is still NFC because zero is a
  valid index everywhere, so the old logic without this shortcut
  eventually reached the same conclusion.)
- Correct a wrong explanation comment in `getSimplifiedOffsets()`.
---
 .../Checkers/ArrayBoundCheckerV2.cpp          | 221 +++++++-----------
 1 file changed, 86 insertions(+), 135 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index db4a2fcea9b2cdd..e52eef0fe94370b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -32,15 +32,13 @@ using namespace taint;
 namespace {
 class ArrayBoundCheckerV2 :
     public Checker<check::Location> {
-  mutable std::unique_ptr<BugType> BT;
-  mutable std::unique_ptr<BugType> TaintBT;
+  BugType BT{this, "Out-of-bound access"};
+  BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
 
-  enum OOB_Kind { OOB_Precedes, OOB_Excedes };
+  enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
 
-  void reportOOB(CheckerContext &C, ProgramStateRef errorState,
-                 OOB_Kind kind) const;
-  void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
-                      SVal TaintedSVal) const;
+  void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
+                 SVal TaintedSVal = UnknownVal()) const;
 
   static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
 
@@ -48,26 +46,58 @@ class ArrayBoundCheckerV2 :
   void checkLocation(SVal l, bool isLoad, const Stmt *S,
                      CheckerContext &C) const;
 };
+} // anonymous namespace
 
-// FIXME: Eventually replace RegionRawOffset with this class.
-class RegionRawOffsetV2 {
-private:
-  const SubRegion *baseRegion;
-  NonLoc byteOffset;
+/// For a given Location that can be represented as a symbolic expression
+/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
+/// Arr and the distance of Location from the beginning of Arr (expressed in a
+/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
+/// cannot be determined.
+std::optional<std::pair<const SubRegion *, NonLoc>>
+computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
+  QualType T = SVB.getArrayIndexType();
+  auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
+    // We will use this utility to add and multiply values.
+    return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
+  };
 
-public:
-  RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
-      : baseRegion(base), byteOffset(offset) { assert(base); }
+  const auto *Region = Location.getAsRegion()->getAs<SubRegion>();
+  std::optional<NonLoc> Offset = std::nullopt;
+
+  while (const auto *ERegion = dyn_cast_or_null<ElementRegion>(Region)) {
+    const auto Index = ERegion->getIndex().getAs<NonLoc>();
+    if (!Index)
+      return std::nullopt;
+
+    QualType ElemType = ERegion->getElementType();
+    // If the element is an incomplete type, go no further.
+    if (ElemType->isIncompleteType())
+      return std::nullopt;
+
+    // Calculate Delta = Index * sizeof(ElemType).
+    NonLoc Size = SVB.makeArrayIndex(
+        SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
+    auto Delta = Calc(BO_Mul, *Index, Size);
+    if (!Delta)
+      return std::nullopt;
+
+    // Perform Offset += Delta, handling the initial nullopt as 0.
+    if (!Offset) {
+      Offset = Delta;
+    } else {
+      Offset = Calc(BO_Add, *Offset, *Delta);
+      if (!Offset)
+        return std::nullopt;
+    }
 
-  NonLoc getByteOffset() const { return byteOffset; }
-  const SubRegion *getRegion() const { return baseRegion; }
+    // Continute the offset calculations with the SuperRegion.
+    Region = ERegion->getSuperRegion()->getAs<SubRegion>();
+  }
 
-  static std::optional<RegionRawOffsetV2>
-  computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location);
+  if (Region && Offset)
+    return std::make_pair(Region, *Offset);
 
-  void dump() const;
-  void dumpToStream(raw_ostream &os) const;
-};
+  return std::nullopt;
 }
 
 // TODO: once the constraint manager is smart enough to handle non simplified
@@ -86,8 +116,8 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
           APSIntType(extent.getValue()).convert(SIE->getRHS());
       switch (SIE->getOpcode()) {
       case BO_Mul:
-        // The constant should never be 0 here, since it the result of scaling
-        // based on the size of a type which is never 0.
+        // The constant should never be 0 here, becasue multiplication by zero
+        // is simplified by the engine.
         if ((extent.getValue() % constant) != 0)
           return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
         else
@@ -163,16 +193,15 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
     return;
 
   ProgramStateRef state = checkerContext.getState();
-
   SValBuilder &svalBuilder = checkerContext.getSValBuilder();
-  const std::optional<RegionRawOffsetV2> &RawOffset =
-      RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
+
+  const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
+      computeOffset(state, svalBuilder, location);
 
   if (!RawOffset)
     return;
 
-  NonLoc ByteOffset = RawOffset->getByteOffset();
-  const SubRegion *Reg = RawOffset->getRegion();
+  auto [Reg, ByteOffset] = *RawOffset;
 
   // CHECK LOWER BOUND
   const MemSpaceRegion *Space = Reg->getMemorySpace();
@@ -207,12 +236,13 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
     if (state_exceedsUpperBound) {
       if (!state_withinUpperBound) {
         // We know that the index definitely exceeds the upper bound.
-        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
         return;
       }
       if (isTainted(state, ByteOffset)) {
         // Both cases are possible, but the index is tainted, so report.
-        reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
+        reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
+                  ByteOffset);
         return;
       }
     }
@@ -224,58 +254,40 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
   checkerContext.addTransition(state);
 }
 
-void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
-                                         ProgramStateRef errorState,
-                                         SVal TaintedSVal) const {
-  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
-  if (!errorNode)
-    return;
-
-  if (!TaintBT)
-    TaintBT.reset(
-        new BugType(this, "Out-of-bound access", categories::TaintedData));
-
-  SmallString<256> buf;
-  llvm::raw_svector_ostream os(buf);
-  os << "Out of bound memory access (index is tainted)";
-  auto BR =
-      std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);
-
-  // Track back the propagation of taintedness.
-  for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
-    BR->markInteresting(Sym);
-  }
-
-  checkerContext.emitReport(std::move(BR));
-}
-
-void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
-                                    ProgramStateRef errorState,
-                                    OOB_Kind kind) const {
+void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
+                                    ProgramStateRef ErrorState, OOB_Kind Kind,
+                                    SVal TaintedSVal) const {
 
-  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
-  if (!errorNode)
+  ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
+  if (!ErrorNode)
     return;
 
-  if (!BT)
-    BT.reset(new BugType(this, "Out-of-bound access"));
+  // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
+  // a follow-up commit.
 
-  // FIXME: This diagnostics are preliminary.  We should get far better
-  // diagnostics for explaining buffer overruns.
+  SmallString<128> Buf;
+  llvm::raw_svector_ostream Out(Buf);
+  Out << "Out of bound memory access ";
 
-  SmallString<256> buf;
-  llvm::raw_svector_ostream os(buf);
-  os << "Out of bound memory access ";
-  switch (kind) {
+  switch (Kind) {
   case OOB_Precedes:
-    os << "(accessed memory precedes memory block)";
+    Out << "(accessed memory precedes memory block)";
+    break;
+  case OOB_Exceeds:
+    Out << "(access exceeds upper limit of memory block)";
     break;
-  case OOB_Excedes:
-    os << "(access exceeds upper limit of memory block)";
+  case OOB_Taint:
+    Out << "(index is tainted)";
     break;
   }
-  auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
-  checkerContext.emitReport(std::move(BR));
+  auto BR = std::make_unique<PathSensitiveBugReport>(
+      Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
+  // Track back the propagation of taintedness, or do nothing if TaintedSVal is
+  // the default UnknownVal().
+  for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
+    BR->markInteresting(Sym);
+  }
+  C.emitReport(std::move(BR));
 }
 
 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
@@ -297,67 +309,6 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
           (MacroName == "isupper") || (MacroName == "isxdigit"));
 }
 
-#ifndef NDEBUG
-LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
-  dumpToStream(llvm::errs());
-}
-
-void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
-  os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
-}
-#endif
-
-/// For a given Location that can be represented as a symbolic expression
-/// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
-/// Arr and the distance of Location from the beginning of Arr (expressed in a
-/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
-/// cannot be determined.
-std::optional<RegionRawOffsetV2>
-RegionRawOffsetV2::computeOffset(ProgramStateRef State, SValBuilder &SVB,
-                                 SVal Location) {
-  QualType T = SVB.getArrayIndexType();
-  auto Calc = [&SVB, State, T](BinaryOperatorKind Op, NonLoc LHS, NonLoc RHS) {
-    // We will use this utility to add and multiply values.
-    return SVB.evalBinOpNN(State, Op, LHS, RHS, T).getAs<NonLoc>();
-  };
-
-  const MemRegion *Region = Location.getAsRegion();
-  NonLoc Offset = SVB.makeZeroArrayIndex();
-
-  while (Region) {
-    if (const auto *ERegion = dyn_cast<ElementRegion>(Region)) {
-      if (const auto Index = ERegion->getIndex().getAs<NonLoc>()) {
-        QualType ElemType = ERegion->getElementType();
-        // If the element is an incomplete type, go no further.
-        if (ElemType->isIncompleteType())
-          return std::nullopt;
-
-        // Perform Offset += Index * sizeof(ElemType); then continue the offset
-        // calculations with SuperRegion:
-        NonLoc Size = SVB.makeArrayIndex(
-            SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
-        if (auto Delta = Calc(BO_Mul, *Index, Size)) {
-          if (auto NewOffset = Calc(BO_Add, Offset, *Delta)) {
-            Offset = *NewOffset;
-            Region = ERegion->getSuperRegion();
-            continue;
-          }
-        }
-      }
-    } else if (const auto *SRegion = dyn_cast<SubRegion>(Region)) {
-      // NOTE: The dyn_cast<>() is expected to succeed, it'd be very surprising
-      // to see a MemSpaceRegion at this point.
-      // FIXME: We may return with {<Region>, 0} even if we didn't handle any
-      // ElementRegion layers. I think that this behavior was introduced
-      // accidentally by 8a4c760c204546aba566e302f299f7ed2e00e287 in 2011, so
-      // it may be useful to review it in the future.
-      return RegionRawOffsetV2(SRegion, Offset);
-    }
-    return std::nullopt;
-  }
-  return std::nullopt;
-}
-
 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
   mgr.registerChecker<ArrayBoundCheckerV2>();
 }



More information about the cfe-commits mailing list