[clang] [analyzer] Add alpha.cplusplus.BoundsInformation checker (PR #112784)

David Kilzer via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 17 14:51:09 PDT 2024


https://github.com/ddkilzer created https://github.com/llvm/llvm-project/pull/112784

Initial version of a bounds information checker to warn when the two-argument std::span constructor has a suspicious-looking size.

>From 1da038a09f7979b4ad5b0843dc4e87b2b450fcfc Mon Sep 17 00:00:00 2001
From: David Kilzer <ddkilzer at apple.com>
Date: Thu, 17 Oct 2024 14:45:34 -0700
Subject: [PATCH] [analyzer] Add alpha.cplusplus.BoundsInformation checker

Initial version of a bounds information checker to warn when the
two-argument std::span constructor has a suspicious-looking size.
---
 .../clang/StaticAnalyzer/Checkers/Checkers.td |   5 +
 .../Checkers/BoundsInformationChecker.cpp     | 199 ++++++++++++++++++
 .../StaticAnalyzer/Checkers/CMakeLists.txt    |   1 +
 .../Analysis/bounds-information-checker.cpp   |  76 +++++++
 clang/test/Analysis/std-span-system-header.h  |  78 +++++++
 5 files changed, 359 insertions(+)
 create mode 100644 clang/lib/StaticAnalyzer/Checkers/BoundsInformationChecker.cpp
 create mode 100644 clang/test/Analysis/bounds-information-checker.cpp
 create mode 100644 clang/test/Analysis/std-span-system-header.h

diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 6e224a4e098ad2..49900f36f8fd10 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -775,6 +775,11 @@ def VirtualCallChecker : Checker<"VirtualCall">,
 
 let ParentPackage = CplusplusAlpha in {
 
+def BoundsInformationChecker : Checker<"BoundsInformation">,
+  HelpText<"Confirms that view objects such as std::span are constructed "
+           "within the bounds of the source buffer">,
+  Documentation<NotDocumented>;
+
 def ContainerModeling : Checker<"ContainerModeling">,
   HelpText<"Models C++ containers">,
   Documentation<NotDocumented>,
diff --git a/clang/lib/StaticAnalyzer/Checkers/BoundsInformationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BoundsInformationChecker.cpp
new file mode 100644
index 00000000000000..8588c068e50f37
--- /dev/null
+++ b/clang/lib/StaticAnalyzer/Checkers/BoundsInformationChecker.cpp
@@ -0,0 +1,199 @@
+//== BoundsInformationChecker.cpp - bounds information checker --*- C++ -*--==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines BoundsInformationChecker, a path-sensitive checker that
+// checks that the buffer and count arguments are within the bounds of
+// the source buffer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class BoundsInformationChecker : public Checker<check::PreCall> {
+  const BugType BT_DifferentMemRegion{
+      this, "std::span constructor arguments from different sources",
+      categories::SecurityError};
+  const BugType BT_NonConstantSizeArg{
+      this,
+      "std::span constructor for std::array has non-constant size argument",
+      categories::SecurityError};
+  const BugType BT_OutOfBounds{
+      this,
+      "std::span constructor for std::array uses out-of-bounds size argument",
+      categories::SecurityError};
+  void reportBug(ExplodedNode *N, const Expr *E, CheckerContext &C,
+                 const BugType &BT, StringRef Msg) const;
+
+public:
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+};
+} // end anonymous namespace
+
+void BoundsInformationChecker::reportBug(ExplodedNode *N, const Expr *E,
+                                         CheckerContext &C, const BugType &BT,
+                                         StringRef Msg) const {
+  // Generate a report for this bug.
+  auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
+  if (auto *CE = dyn_cast<CXXConstructExpr>(E)) {
+    bugreporter::trackExpressionValue(N, CE->getArg(0), *R);
+    bugreporter::trackExpressionValue(N, CE->getArg(1), *R);
+  }
+  C.emitReport(std::move(R));
+}
+
+static const MemRegion *GetRegionOrigin(SVal SV) {
+  const SymExpr *Sym = SV.getAsSymbol(/*IncludeBaseRegions =*/true);
+  return Sym ? Sym->getOriginRegion() : nullptr;
+}
+
+static const ValueDecl *GetExpressionOrigin(const Stmt *STMT) {
+  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(STMT)) {
+    if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+      return VD;
+  } else if (const CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(STMT)) {
+    if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(
+            MCE->getImplicitObjectArgument()->IgnoreParenCasts())) {
+      if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+        return VD;
+    } else if (const MemberExpr *ME = dyn_cast<MemberExpr>(
+                   MCE->getImplicitObjectArgument()->IgnoreParenCasts())) {
+      if (const FieldDecl *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
+        return FD;
+    }
+  } else if (const CXXOperatorCallExpr *OCE =
+                 dyn_cast<CXXOperatorCallExpr>(STMT)) {
+    if (OCE->getNumArgs() >= 1) {
+      if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(OCE->getArg(0))) {
+        if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+          return VD;
+      }
+    }
+  } else if (const UnaryOperator *UnaryOp = dyn_cast<UnaryOperator>(STMT)) {
+    if (const ArraySubscriptExpr *ASExpr =
+            dyn_cast<ArraySubscriptExpr>(UnaryOp->getSubExpr())) {
+      if (const DeclRefExpr *DRE =
+              dyn_cast<DeclRefExpr>(ASExpr->getBase()->IgnoreParenCasts())) {
+        if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+          return VD;
+      }
+    }
+  } else if (const UnaryExprOrTypeTraitExpr *UTExpr =
+                 dyn_cast<UnaryExprOrTypeTraitExpr>(STMT)) {
+    if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(
+            UTExpr->getArgumentExpr()->IgnoreParenCasts())) {
+      if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+        return VD;
+    }
+  }
+  return nullptr;
+}
+
+static const ValueDecl *GetConjuredSymbolOrigin(SVal SV) {
+  const SymExpr *Sym = SV.getAsSymbol(/*IncludeBaseRegions =*/true);
+  if (const SymbolConjured *SCArg = dyn_cast_or_null<SymbolConjured>(Sym)) {
+    if (const Stmt *STMTArg = SCArg->getStmt())
+      return GetExpressionOrigin(STMTArg);
+  }
+  return nullptr;
+}
+
+void BoundsInformationChecker::checkPreCall(const CallEvent &Call,
+                                            CheckerContext &C) const {
+  // Return early if not std::span<IT>(IT, size_t) constructor.
+  // a. Check if this is a ctor for std::span.
+  CallDescription CD({CDM::CXXMethod, {"std", "span", "span"}});
+  if (!CD.matches(Call))
+    return;
+  // b. Check if std::span ctor has two arguments.
+  if (Call.getNumArgs() != 2)
+    return;
+  // c. Check if second std::span ctor argument is of type size_t.
+  if (Call.getArgExpr(1)->getType().getCanonicalType() !=
+      C.getASTContext().getSizeType())
+    return;
+
+  SVal PointerArg = Call.getArgSVal(0);
+  SVal SizeArg = Call.getArgSVal(1);
+
+  // If buffer and length params are not from the same "source", then report a
+  // bug.
+  const MemRegion *MRArg0 = GetRegionOrigin(PointerArg);
+  const MemRegion *MRArg1 = GetRegionOrigin(SizeArg);
+  if (MRArg0 && MRArg1 && MRArg0->getBaseRegion() != MRArg1->getBaseRegion()) {
+    // FIXME: Add more logic to filter out valid cases.
+    if (ExplodedNode *N = C.generateNonFatalErrorNode(C.getState())) {
+      reportBug(
+          N, Call.getOriginExpr(), C, BT_DifferentMemRegion,
+          "Constructor args for std::span are from different memory regions");
+      return;
+    }
+  }
+
+  // Check if value comes from an unknown function call.
+  const ValueDecl *VDArg0 = GetConjuredSymbolOrigin(PointerArg);
+  const ValueDecl *VDArg1 = GetConjuredSymbolOrigin(SizeArg);
+
+  if (VDArg0) {
+    // If first argument is std::array.
+    // FIXME: Support C arrays.
+    if (const auto *CRDecl0 = VDArg0->getType()->getAsCXXRecordDecl()) {
+      if (CRDecl0->isInStdNamespace() && CRDecl0->getIdentifier() &&
+          CRDecl0->getName() == "array") {
+        if (VDArg0 != VDArg1) {
+          // Check second argument against known size of std::array.
+          if (SizeArg.isConstant()) {
+            if (const auto *CTSDecl =
+                    dyn_cast<ClassTemplateSpecializationDecl>(CRDecl0)) {
+              const TemplateArgumentList &templateArgList =
+                  CTSDecl->getTemplateArgs();
+              if (templateArgList.size() == 2) {
+                const TemplateArgument &templateArg1 = templateArgList[1];
+                if (templateArg1.getKind() ==
+                        TemplateArgument::ArgKind::Integral &&
+                    *SizeArg.getAsInteger() > templateArg1.getAsIntegral()) {
+                  if (ExplodedNode *N =
+                          C.generateNonFatalErrorNode(C.getState())) {
+                    reportBug(N, Call.getOriginExpr(), C, BT_OutOfBounds,
+                              "std::span constructed with overflow length");
+                    return;
+                  }
+                }
+              }
+            }
+          } else if (ExplodedNode *N =
+                         C.generateNonFatalErrorNode(C.getState())) {
+            reportBug(N, Call.getOriginExpr(), C, BT_NonConstantSizeArg,
+                      "std::span constructed from std::array with non-constant "
+                      "length");
+            return;
+          }
+        }
+      }
+    }
+  }
+}
+
+void ento::registerBoundsInformationChecker(CheckerManager &mgr) {
+  mgr.registerChecker<BoundsInformationChecker>();
+}
+
+bool ento::shouldRegisterBoundsInformationChecker(const CheckerManager &mgr) {
+  return true;
+}
diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index 682cfa01bec963..76f8d68818bfb5 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -13,6 +13,7 @@ add_clang_library(clangStaticAnalyzerCheckers
   BitwiseShiftChecker.cpp
   BlockInCriticalSectionChecker.cpp
   BoolAssignmentChecker.cpp
+  BoundsInformationChecker.cpp
   BuiltinFunctionChecker.cpp
   CStringChecker.cpp
   CStringSyntaxChecker.cpp
diff --git a/clang/test/Analysis/bounds-information-checker.cpp b/clang/test/Analysis/bounds-information-checker.cpp
new file mode 100644
index 00000000000000..332730b0bf3386
--- /dev/null
+++ b/clang/test/Analysis/bounds-information-checker.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang %s -std=c++20 -Xclang -verify --analyze \
+// RUN:   -Xclang -analyzer-checker=core,alpha.cplusplus.BoundsInformation \
+// RUN:   -Xclang -analyzer-checker=debug.ExprInspection
+
+#include "std-span-system-header.h"
+
+//----------------------------------------------------------------------------//
+// std::span - no issue
+//----------------------------------------------------------------------------//
+
+void stdSpanFromCArrayNoIssue() {
+  char buffer[4] = { '3', '.', '1', '4' };
+
+  (void)std::span { buffer };                           // no-warning
+  (void)std::span<char> { buffer, sizeof(buffer) };     // no-warning
+  (void)std::span<char> { &buffer[0], sizeof(buffer) }; // no-warning
+
+  char *ptr = buffer;
+  size_t size = sizeof(buffer);
+  (void)std::span<char> { ptr, size };                  // no-warning
+}
+
+void stdSpanFromStdArrayNoIssue() {
+  std::array<char, 124> buffer { '3', '.', '1', '4' };
+
+  (void)std::span { buffer }.first(4);                             // no-warning
+  (void)std::span<char> { buffer.data(), buffer.size() }.first(4); // no-warning
+  (void)std::span<char> { &buffer[0], buffer.size() }.first(4);    // no-warning
+
+  char *ptr = buffer.data();
+  size_t size = buffer.size();
+  (void)std::span<char> { ptr, size }.first(4);                    // no-warning
+}
+
+void stdSpanFromStaticCArray() {
+  static const uint8_t prefixDeltaFrame[6] = { 0x00, 0x00, 0x00, 0x01, 0x21, 0xe0 };
+  (void)std::span<const uint8_t> { prefixDeltaFrame, sizeof(prefixDeltaFrame) };     // no-warning
+  (void)std::span<const uint8_t> { &prefixDeltaFrame[0], sizeof(prefixDeltaFrame) }; // no-warning
+}
+
+// No issue because arguments from the same memory region.
+// FIXME: This works locally, but not in actual WebKit code.
+class MappedFileData {
+public:
+  unsigned size() const { return m_fileSize; }
+  std::span<const uint8_t> span() const { return { static_cast<const uint8_t*>(m_fileData), size() }; } // no-warning
+  std::span<uint8_t> mutableSpan() { return { static_cast<uint8_t*>(m_fileData), size() }; }            // no-warning
+
+private:
+  void* m_fileData { nullptr };
+  unsigned m_fileSize { 0 };
+};
+
+//----------------------------------------------------------------------------//
+// std::span - warnings
+//----------------------------------------------------------------------------//
+
+void stdSpanFromStdArrayWarnings() {
+  std::array<char, 124> buffer { '3', '.', '1', '4' };
+  (void)std::span<char> { &buffer[0], 4 };
+  (void)std::span<char> { buffer.data(), 4 };
+}
+
+void stdSpanFromStdArrayOutOfBounds() {
+  std::array<char, 4> buffer { '3', '.', '1', '4' };
+  (void)std::span<char> { &buffer[0], 5 };    // expected-warning {{std::span constructed with overflow length}}
+  (void)std::span<char> { buffer.data(), 5 }; // expected-warning {{std::span constructed with overflow length}}
+}
+
+struct HexNumberBuffer {
+   std::array<char, 16> buffer;
+   unsigned length;
+
+   const char* characters() const { return &*(buffer.end() - length); }
+   std::span<const char> span() const { return { characters(), length }; } // expected-warning {{std::span constructed from std::array with non-constant length}}
+};
diff --git a/clang/test/Analysis/std-span-system-header.h b/clang/test/Analysis/std-span-system-header.h
new file mode 100644
index 00000000000000..5484bc3e8c0c4d
--- /dev/null
+++ b/clang/test/Analysis/std-span-system-header.h
@@ -0,0 +1,78 @@
+#pragma clang system_header
+
+#include "Inputs/system-header-simulator-cxx.h"
+
+namespace std {
+
+template <class _Tp, size_t _Size>
+struct array {
+  typedef array                                 __self;
+  typedef _Tp                                   value_type;
+  typedef value_type&                           reference;
+  typedef const value_type&                     const_reference;
+  typedef value_type*                           iterator;
+  typedef const value_type*                     const_iterator;
+  typedef value_type*                           pointer;
+  typedef const value_type*                     const_pointer;
+  typedef size_t                                size_type;
+
+  _Tp __elems_[_Size];
+
+  reference operator[](size_type __n);
+  const_reference operator[](size_type __n) const;
+
+  value_type* data();
+  const value_type* data() const;
+
+  size_type size() const;
+
+  iterator begin();
+  const_iterator begin() const;
+  iterator end();
+  const_iterator end() const;
+};
+
+inline constexpr size_t dynamic_extent = (size_t)-1;
+
+template <typename _Tp, size_t _Extent = dynamic_extent>
+class span {
+public:
+  using element_type           = _Tp;
+  using value_type             = remove_cv_t<_Tp>;
+  using size_type              = size_t;
+  using pointer                = _Tp *;
+
+  template <size_t _Sz = _Extent> requires(_Sz == 0)
+  constexpr span();
+
+  constexpr span(const span&);
+
+  template <typename _It>
+  constexpr span(_It* __first, size_type __count);
+
+  template <size_t _Sz>
+  constexpr span(element_type (&__arr)[_Sz]);
+
+  template <class _OtherElementType>
+  constexpr span(array<_OtherElementType, _Extent>& __arr);
+
+  template <class _OtherElementType>
+  constexpr span(const array<_OtherElementType, _Extent>& __arr);
+
+  constexpr pointer data() const;
+
+  constexpr size_type size() const;
+  constexpr size_type size_bytes() const;
+
+  constexpr span<element_type, dynamic_extent> first(size_type __count) const;
+};
+
+template<class _Tp, size_t _Sz>
+  span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>;
+
+template<class _Tp, size_t _Sz>
+  span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>;
+
+template<class _Tp, size_t _Sz>
+  span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>;
+}



More information about the cfe-commits mailing list