[clang] fe06a6d - Reland: [clang] Diagnose dangling issues for the "Container<GSLPointer>" case. #107213 (#108344)

via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 25 05:12:53 PDT 2024


Author: Haojian Wu
Date: 2024-09-25T14:12:49+02:00
New Revision: fe06a6daae6be85d47cd1e51654e91f9ac6e63d7

URL: https://github.com/llvm/llvm-project/commit/fe06a6daae6be85d47cd1e51654e91f9ac6e63d7
DIFF: https://github.com/llvm/llvm-project/commit/fe06a6daae6be85d47cd1e51654e91f9ac6e63d7.diff

LOG: Reland: [clang] Diagnose dangling issues for the "Container<GSLPointer>" case. #107213 (#108344)

This relands #107213, with with fixes to address false positives
(`make_optional(nullptr)`).

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/AttrDocs.td
    clang/lib/Sema/CheckExprLifetime.cpp
    clang/test/Sema/warn-lifetime-analysis-nocfg.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5923888383022a..14907e7db18de3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -336,6 +336,8 @@ Improvements to Clang's diagnostics
   local variables passed to function calls using the ``[[clang::musttail]]``
   attribute.
 
+- Clang now diagnoses cases where a dangling ``GSLOwner<GSLPointer>`` object is constructed, e.g. ``std::vector<string_view> v = {std::string()};`` (#GH100526).
+
 Improvements to Clang's time-trace
 ----------------------------------
 

diff  --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index f23a148e546fa3..53d88482698f00 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -6696,6 +6696,20 @@ When the Owner's lifetime ends, it will consider the Pointer to be dangling.
     P.getInt(); // P is dangling
   }
 
+If a template class is annotated with ``[[gsl::Owner]]``, and the first
+instantiated template argument is a pointer type (raw pointer, or ``[[gsl::Pointer]]``),
+the analysis will consider the instantiated class as a container of the pointer.
+When constructing such an object from a GSL owner object, the analysis will
+assume that the container holds a pointer to the owner object. Consequently,
+when the owner object is destroyed, the pointer will be considered dangling.
+
+.. code-block:: c++
+
+   int f() {
+     std::vector<std::string_view> v = {std::string()}; // v holds a dangling pointer.
+     std::optional<std::string_view> o = std::string(); // o holds a dangling pointer.
+   }
+
 }];
 }
 

diff  --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
index e9e39c11ffbaab..009b8d000e6b0e 100644
--- a/clang/lib/Sema/CheckExprLifetime.cpp
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -271,6 +271,49 @@ static bool isInStlNamespace(const Decl *D) {
   return DC->isStdNamespace();
 }
 
+static bool isPointerLikeType(QualType Type) {
+  return isRecordWithAttr<PointerAttr>(Type) || Type->isPointerType() ||
+         Type->isNullPtrType();
+}
+
+// Returns true if the given Record decl is a form of `GSLOwner<Pointer>`
+// type, e.g. std::vector<string_view>, std::optional<string_view>.
+static bool isContainerOfPointer(const RecordDecl *Container) {
+  if (const auto *CTSD =
+          dyn_cast_if_present<ClassTemplateSpecializationDecl>(Container)) {
+    if (!CTSD->hasAttr<OwnerAttr>()) // Container must be a GSL owner type.
+      return false;
+    const auto &TAs = CTSD->getTemplateArgs();
+    return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type &&
+           isPointerLikeType(TAs[0].getAsType());
+  }
+  return false;
+}
+static bool isContainerOfOwner(const RecordDecl *Container) {
+  const auto *CTSD =
+      dyn_cast_if_present<ClassTemplateSpecializationDecl>(Container);
+  if (!CTSD)
+    return false;
+  if (!CTSD->hasAttr<OwnerAttr>()) // Container must be a GSL owner type.
+    return false;
+  const auto &TAs = CTSD->getTemplateArgs();
+  return TAs.size() > 0 && TAs[0].getKind() == TemplateArgument::Type &&
+         isRecordWithAttr<OwnerAttr>(TAs[0].getAsType());
+}
+
+// Returns true if the given Record is `std::initializer_list<pointer>`.
+static bool isStdInitializerListOfPointer(const RecordDecl *RD) {
+  if (const auto *CTSD =
+          dyn_cast_if_present<ClassTemplateSpecializationDecl>(RD)) {
+    const auto &TAs = CTSD->getTemplateArgs();
+    return isInStlNamespace(RD) && RD->getIdentifier() &&
+           RD->getName() == "initializer_list" && TAs.size() > 0 &&
+           TAs[0].getKind() == TemplateArgument::Type &&
+           isPointerLikeType(TAs[0].getAsType());
+  }
+  return false;
+}
+
 static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
   if (auto *Conv = dyn_cast_or_null<CXXConversionDecl>(Callee))
     if (isRecordWithAttr<PointerAttr>(Conv->getConversionType()) &&
@@ -282,8 +325,7 @@ static bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
           Callee->getFunctionObjectParameterType()) &&
       !isRecordWithAttr<OwnerAttr>(Callee->getFunctionObjectParameterType()))
     return false;
-  if (Callee->getReturnType()->isPointerType() ||
-      isRecordWithAttr<PointerAttr>(Callee->getReturnType())) {
+  if (isPointerLikeType(Callee->getReturnType())) {
     if (!Callee->getIdentifier())
       return false;
     return llvm::StringSwitch<bool>(Callee->getName())
@@ -331,6 +373,103 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) {
   return false;
 }
 
+// Returns true if the given constructor is a copy-like constructor, such as
+// `Ctor(Owner<U>&&)` or `Ctor(const Owner<U>&)`.
+static bool isCopyLikeConstructor(const CXXConstructorDecl *Ctor) {
+  if (!Ctor || Ctor->param_size() != 1)
+    return false;
+  const auto *ParamRefType =
+      Ctor->getParamDecl(0)->getType()->getAs<ReferenceType>();
+  if (!ParamRefType)
+    return false;
+
+  // Check if the first parameter type is "Owner<U>".
+  if (const auto *TST =
+          ParamRefType->getPointeeType()->getAs<TemplateSpecializationType>())
+    return TST->getTemplateName()
+        .getAsTemplateDecl()
+        ->getTemplatedDecl()
+        ->hasAttr<OwnerAttr>();
+  return false;
+}
+
+// Returns true if we should perform the GSL analysis on the first argument for
+// the given constructor.
+static bool
+shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) {
+  const auto *LHSRecordDecl = Ctor->getConstructor()->getParent();
+
+  // Case 1, construct a GSL pointer, e.g. std::string_view
+  // Always inspect when LHS is a pointer.
+  if (LHSRecordDecl->hasAttr<PointerAttr>())
+    return true;
+
+  if (Ctor->getConstructor()->getNumParams() != 1 ||
+      !isContainerOfPointer(LHSRecordDecl))
+    return false;
+
+  // Now, the LHS is an Owner<Pointer> type, e.g., std::vector<string_view>.
+  //
+  // At a high level, we cannot precisely determine what the nested pointer
+  // owns. However, by analyzing the RHS owner type, we can use heuristics to
+  // infer ownership information. These heuristics are designed to be
+  // conservative, minimizing false positives while still providing meaningful
+  // diagnostics.
+  //
+  // While this inference isn't perfect, it helps catch common use-after-free
+  // patterns.
+  auto RHSArgType = Ctor->getArg(0)->getType();
+  const auto *RHSRD = RHSArgType->getAsRecordDecl();
+  // LHS is constructed from an intializer_list.
+  //
+  // std::initializer_list is a proxy object that provides access to the backing
+  // array. We perform analysis on it to determine if there are any dangling
+  // temporaries in the backing array.
+  // E.g. std::vector<string_view> abc = {string()};
+  if (isStdInitializerListOfPointer(RHSRD))
+    return true;
+
+  // RHS must be an owner.
+  if (!isRecordWithAttr<OwnerAttr>(RHSArgType))
+    return false;
+
+  // Bail out if the RHS is Owner<Pointer>.
+  //
+  // We cannot reliably determine what the LHS nested pointer owns -- it could
+  // be the entire RHS or the nested pointer in RHS. To avoid false positives,
+  // we skip this case, such as:
+  //   std::stack<std::string_view> s(std::deque<std::string_view>{});
+  //
+  // TODO: this also has a false negative, it doesn't catch the case like:
+  //   std::optional<span<int*>> os = std::vector<int*>{}
+  if (isContainerOfPointer(RHSRD))
+    return false;
+
+  // Assume that the nested Pointer is constructed from the nested Owner.
+  // E.g. std::optional<string_view> sv = std::optional<string>(s);
+  if (isContainerOfOwner(RHSRD))
+    return true;
+
+  // Now, the LHS is an Owner<Pointer> and the RHS is an Owner<X>,  where X is
+  // neither an `Owner` nor a `Pointer`.
+  //
+  // Use the constructor's signature as a hint. If it is a copy-like constructor
+  // `Owner1<Pointer>(Owner2<X>&&)`, we assume that the nested pointer is
+  // constructed from X. In such cases, we do not diagnose, as `X` is not an
+  // owner, e.g.
+  //   std::optional<string_view> sv = std::optional<Foo>();
+  if (const auto *PrimaryCtorTemplate =
+          Ctor->getConstructor()->getPrimaryTemplate();
+      PrimaryCtorTemplate &&
+      isCopyLikeConstructor(dyn_cast_if_present<CXXConstructorDecl>(
+          PrimaryCtorTemplate->getTemplatedDecl()))) {
+    return false;
+  }
+  // Assume that the nested pointer is constructed from the whole RHS.
+  // E.g. optional<string_view> s = std::string();
+  return true;
+}
+
 // Return true if this is an "normal" assignment operator.
 // We assuments that a normal assingment operator always returns *this, that is,
 // an lvalue reference that is the same type as the implicit object parameter
@@ -473,12 +612,12 @@ static void visitFunctionCallArguments(IndirectLocalPath &Path, Expr *Call,
     if (CheckCoroCall || Callee->getParamDecl(I)->hasAttr<LifetimeBoundAttr>())
       VisitLifetimeBoundArg(Callee->getParamDecl(I), Args[I]);
     else if (EnableGSLAnalysis && I == 0) {
+      // Perform GSL analysis for the first argument
       if (shouldTrackFirstArgument(Callee)) {
         VisitGSLPointerArg(Callee, Args[0]);
-      } else if (auto *CCE = dyn_cast<CXXConstructExpr>(Call);
-                 CCE &&
-                 CCE->getConstructor()->getParent()->hasAttr<PointerAttr>()) {
-        VisitGSLPointerArg(CCE->getConstructor(), Args[0]);
+      } else if (auto *Ctor = dyn_cast<CXXConstructExpr>(Call);
+                 Ctor && shouldTrackFirstArgumentForConstructor(Ctor)) {
+        VisitGSLPointerArg(Ctor->getConstructor(), Args[0]);
       }
     }
   }

diff  --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
index 69e5395a78a57e..c6272a775a28fa 100644
--- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
+++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp
@@ -158,17 +158,30 @@ auto begin(C &c) -> decltype(c.begin());
 template<typename T, int N>
 T *begin(T (&array)[N]);
 
+using size_t = decltype(sizeof(0));
+
+template<typename T>
+struct initializer_list {
+  const T* ptr; size_t sz;
+};
 template <typename T>
 struct vector {
   typedef __gnu_cxx::basic_iterator<T> iterator;
   iterator begin();
   iterator end();
   const T *data() const;
+  vector();
+  vector(initializer_list<T> __l);
+
+  template<typename InputIterator>
+	vector(InputIterator first, InputIterator __last);
+
   T &at(int n);
 };
 
 template<typename T>
 struct basic_string_view {
+  basic_string_view();
   basic_string_view(const T *);
   const T *begin() const;
 };
@@ -203,11 +216,21 @@ template<typename T>
 struct optional {
   optional();
   optional(const T&);
+
+  template<typename U = T>
+  optional(U&& t);
+
+  template<typename U>
+  optional(optional<U>&& __t);
+
   T &operator*() &;
   T &&operator*() &&;
   T &value() &;
   T &&value() &&;
 };
+template<typename T>
+optional<__decay(T)> make_optional(T&&);
+
 
 template<typename T>
 struct stack {
@@ -587,3 +610,170 @@ std::string_view test2() {
   return k.value(); // expected-warning {{address of stack memory associated}}
 }
 } // namespace GH108272
+
+namespace GH100526 {
+void test() {
+  std::vector<std::string_view> v1({std::string()}); // expected-warning {{object backing the pointer will be destroyed at the end}}
+  std::vector<std::string_view> v2({
+    std::string(), // expected-warning {{object backing the pointer will be destroyed at the end}}
+    std::string_view()
+  });
+  std::vector<std::string_view> v3({
+    std::string_view(),
+    std::string()  // expected-warning {{object backing the pointer will be destroyed at the end}}
+  });
+
+  std::optional<std::string_view> o1 = std::string(); // expected-warning {{object backing the pointer}}
+
+  std::string s;
+  // This is a tricky use-after-free case, what it does:
+  //   1. make_optional creates a temporary "optional<string>"" object
+  //   2. the temporary object owns the underlying string which is copied from s.
+  //   3. the t3 object holds the view to the underlying string of the temporary object.
+  std::optional<std::string_view> o2 = std::make_optional(s); // expected-warning {{object backing the pointer}}
+  std::optional<std::string_view> o3 = std::optional<std::string>(s); // expected-warning {{object backing the pointer}}
+  std::optional<std::string_view> o4 = std::optional<std::string_view>(s);
+
+  // FIXME: should work for assignment cases
+  v1 = {std::string()};
+  o1 = std::string();
+
+  // no warning on copying pointers.
+  std::vector<std::string_view> n1 = {std::string_view()};
+  std::optional<std::string_view> n2 = {std::string_view()};
+  std::optional<std::string_view> n3 = std::string_view();
+  std::optional<std::string_view> n4 = std::make_optional(std::string_view());
+  const char* b = "";
+  std::optional<std::string_view> n5 = std::make_optional(b);
+  std::optional<std::string_view> n6 = std::make_optional("test");
+}
+
+std::vector<std::string_view> test2(int i) {
+  std::vector<std::string_view> t;
+  if (i)
+    return t; // this is fine, no dangling
+  return std::vector<std::string_view>(t.begin(), t.end());
+}
+
+class Foo {
+  public:
+   operator std::string_view() const { return ""; }
+};
+class [[gsl::Owner]] FooOwner {
+  public:
+   operator std::string_view() const { return ""; }
+};
+std::optional<Foo> GetFoo();
+std::optional<FooOwner> GetFooOwner();
+
+template <typename T>
+struct [[gsl::Owner]] Container1 {
+   Container1();
+};
+template <typename T>
+struct [[gsl::Owner]] Container2 {
+  template<typename U>
+  Container2(const Container1<U>& C2);
+};
+
+std::optional<std::string_view> test3(int i) {
+  std::string s;
+  std::string_view sv;
+  if (i)
+   return s; // expected-warning {{address of stack memory associated}}
+  return sv; // fine
+  Container2<std::string_view> c1 = Container1<Foo>(); // no diagnostic as Foo is not an Owner.
+  Container2<std::string_view> c2 = Container1<FooOwner>(); // expected-warning {{object backing the pointer will be destroyed}}
+  return GetFoo(); // fine, we don't know Foo is owner or not, be conservative.
+  return GetFooOwner(); // expected-warning {{returning address of local temporary object}}
+}
+
+std::optional<int*> test4(int a) {
+  return std::make_optional(nullptr); // fine
+}
+
+
+template <typename T>
+struct [[gsl::Owner]] StatusOr {
+  const T &valueLB() const [[clang::lifetimebound]];
+  const T &valueNoLB() const;
+};
+
+template<typename T>
+struct [[gsl::Pointer]] Span {
+  Span(const std::vector<T> &V);
+
+  const int& getFieldLB() const [[clang::lifetimebound]];
+  const int& getFieldNoLB() const;
+};
+
+
+/////// From Owner<Pointer> ///////
+
+// Pointer from Owner<Pointer>
+std::string_view test5() {
+  std::string_view a = StatusOr<std::string_view>().valueLB(); // expected-warning {{object backing the pointer will be dest}}
+return StatusOr<std::string_view>().valueLB(); // expected-warning {{returning address of local temporary}}
+
+  // No dangling diagnostics on non-lifetimebound methods.
+  std::string_view b = StatusOr<std::string_view>().valueNoLB();
+  return StatusOr<std::string_view>().valueNoLB();
+}
+
+// Pointer<Pointer> from Owner<Pointer>
+// Prevent regression GH108463
+Span<int*> test6(std::vector<int*> v) {
+  Span<int *> dangling = std::vector<int*>(); // expected-warning {{object backing the pointer}}
+  return v; // expected-warning {{address of stack memory}}
+}
+
+/////// From Owner<Owner<Pointer>> ///////
+
+// Pointer from Owner<Owner<Pointer>>
+int* test7(StatusOr<StatusOr<int*>> aa) {
+  // No dangling diagnostic on pointer.
+  return aa.valueLB().valueLB(); // OK.
+}
+
+// Owner<Pointer> from Owner<Owner<Pointer>>
+std::vector<int*> test8(StatusOr<std::vector<int*>> aa) {
+  return aa.valueLB(); // OK, no pointer being construct on this case.
+  return aa.valueNoLB();
+}
+
+// Pointer<Pointer> from Owner<Owner<Pointer>>
+Span<int*> test9(StatusOr<std::vector<int*>> aa) {
+  return aa.valueLB(); // expected-warning {{address of stack memory associated}}
+  return aa.valueNoLB(); // OK.
+}
+
+/////// From Owner<Owner> ///////
+
+// Pointer<Owner>> from Owner<Owner>
+Span<std::string> test10(StatusOr<std::vector<std::string>> aa) {
+  return aa.valueLB(); // expected-warning {{address of stack memory}}
+  return aa.valueNoLB(); // OK.
+}
+
+/////// From Owner<Pointer<Owner>> ///////
+
+// Pointer<Owner>> from Owner<Pointer<Owner>>
+Span<std::string> test11(StatusOr<Span<std::string>> aa) {
+  return aa.valueLB(); // expected-warning {{address of stack memory}}
+  return aa.valueNoLB(); // OK.
+}
+
+// Lifetimebound and gsl::Pointer.
+const int& test12(Span<int> a) {
+  return a.getFieldLB(); // expected-warning {{reference to stack memory associated}}
+  return a.getFieldNoLB(); // OK.
+}
+
+void test13() {
+  // FIXME: RHS is Owner<Pointer>, we skip this case to avoid false positives.
+  std::optional<Span<int*>> abc = std::vector<int*>{};
+
+  std::optional<Span<int>> t = std::vector<int> {}; // expected-warning {{object backing the pointer will be destroyed}}
+}
+
+} // namespace GH100526


        


More information about the cfe-commits mailing list