[libcxx-commits] [libcxxabi] [libc++abi] Handle null pointer-to-object: Issue #64593 (PR #68076)

Iain Sandoe via libcxx-commits libcxx-commits at lists.llvm.org
Fri Oct 6 15:17:55 PDT 2023


https://github.com/iains updated https://github.com/llvm/llvm-project/pull/68076

>From 95c21fc6cc28560b35ce97bad83fddc0e4a419d7 Mon Sep 17 00:00:00 2001
From: Iain Sandoe <iain at sandoe.co.uk>
Date: Thu, 24 Aug 2023 21:05:33 +0100
Subject: [PATCH] [libc++abi] Handle null pointer-to-object: Issue #64593

This addresses cases (currently failing) where we throw a null pointer-to-
object and is a proposed fix for
https://github.com/llvm/llvm-project/issues/64953

We are trying to satisfy the following bullet from the C++ ABI 15.3:

 *  the handler is of type cv1 T* cv2 and E is a pointer type that can be
    converted to the type of the handler by either or both of:

    o  a standard pointer conversion (4.10 [conv.ptr]) not involving
       conversions to private or protected or ambiguous classes.

    o  a qualification conversion.

The existing implementation assesses the ambiguity of bases by computing the
offsets to them; ambiguous cases are then when the same base appears at
different offsets.  The computation of offset  includes indirecting through
the vtables to find the offsets to virtual bases.

When the thrown pointer points to a real object, this is quite efficient since,
if the base is found, and it is not ambiguous and on a public path, the offset
is needed to return the adjusted pointer (and the indirections are not
particularly expensive to compute).

However, when we throw a null pointer-to-object, this scheme is no longer
applicable (and the code currently bypasses the relevant computations, leading
to the incorrect catches reported in the issue).

-----

The solution proposed here takes a composite approach:

1. When the pointer-to-object points to a real instance (well, at least, it is
   determined to be non-null), we use the existing scheme.

2. When the pointer-to-object is null:

 * We note that there is no real object.
 * When we are processing non-virtual bases, we continue to compute the offsets,
   but for a notional dummy object based at 0.  This is OK, since we never need
   to access the object content for non-virtual bases.
 * When we are processing a path with one or more virtual bases, we remember a
   cookie corresponding to the inner-most virtual base found so far (and set
   the notional offset to 0).  Offsets to inner non-virtual bases are then
   computed as normal.

A base is then ambiguous iff:
  * There is a recorded virtual base cookie and that is different from the
    current one or,
  * The non-virtual base offsets differ.

When a handler for a pointer succeeds in catching a base pointer for a thrown
null pointer-to-object, we still return a nullptr (so the adjustment to the
pointer is not required and need not be computed).

Since we noted that there was no object when starting the search for ambiguous
bases, we know that we can skip the pointer adjustment.

This was : Differential Revision: https://reviews.llvm.org/D158769
---
 libcxxabi/src/private_typeinfo.cpp            |  80 ++++++--
 libcxxabi/src/private_typeinfo.h              |  19 +-
 ...ch_null_pointer_to_object_pr64953.pass.cpp | 189 ++++++++++++++++++
 3 files changed, 266 insertions(+), 22 deletions(-)
 create mode 100644 libcxxabi/test/catch_null_pointer_to_object_pr64953.pass.cpp

diff --git a/libcxxabi/src/private_typeinfo.cpp b/libcxxabi/src/private_typeinfo.cpp
index 82db4bbec1ada2e..49261c11570741f 100644
--- a/libcxxabi/src/private_typeinfo.cpp
+++ b/libcxxabi/src/private_typeinfo.cpp
@@ -42,6 +42,7 @@
 // is_equal() with use_strcmp=false so the string names are not compared.
 
 #include <cstdint>
+#include <cassert>
 #include <string.h>
 
 #ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
@@ -167,7 +168,8 @@ const void* dyn_cast_to_derived(const void* static_ptr,
         src2dst_offset,
         0, 0, 0, 0, 0, 0, 0, 0,
         1,  // number_of_dst_type
-        false, false, false
+        false, false, false,
+        true, nullptr
     };
     // Do the  search
     dst_type->search_above_dst(&info, dynamic_ptr, dynamic_ptr, public_path, false);
@@ -192,7 +194,8 @@ const void* dyn_cast_to_derived(const void* static_ptr,
             static_ptr,
             static_type,
             src2dst_offset,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false
+            0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false,
+            true, nullptr
         };
         info.number_of_dst_type = 1;
         dst_type->search_above_dst(&info, dynamic_ptr, dynamic_ptr, public_path, true);
@@ -239,7 +242,8 @@ const void* dyn_cast_try_downcast(const void* static_ptr,
         src2dst_offset,
         0, 0, 0, 0, 0, 0, 0, 0,
         1,  // number_of_dst_type
-        false, false, false
+        false, false, false,
+        true, nullptr
     };
     dynamic_type->search_above_dst(&dynamic_to_dst_info, dynamic_ptr, dynamic_ptr, public_path, false);
     if (dynamic_to_dst_info.path_dst_ptr_to_static_ptr != unknown) {
@@ -266,7 +270,8 @@ const void* dyn_cast_slow(const void* static_ptr,
         static_ptr,
         static_type,
         src2dst_offset,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false
+        0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false,
+        true, nullptr
     };
 
     dynamic_type->search_below_dst(&info, dynamic_ptr, public_path, false);
@@ -292,7 +297,7 @@ const void* dyn_cast_slow(const void* static_ptr,
             static_ptr,
             static_type,
             src2dst_offset,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false
+            0, 0, 0, 0, 0, 0, 0, 0, 0, false, false, false, true, nullptr
         };
         dynamic_type->search_below_dst(&info, dynamic_ptr, public_path, true);
     }
@@ -481,7 +486,10 @@ __class_type_info::can_catch(const __shim_type_info* thrown_type,
     if (thrown_class_type == 0)
         return false;
     // bullet 2
-    __dynamic_cast_info info = {thrown_class_type, 0, this, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};
+    assert (adjustedPtr && "catching a class without an object?");
+    __dynamic_cast_info info = {thrown_class_type, 0, this,
+                                -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                true, nullptr };
     info.number_of_dst_type = 1;
     thrown_class_type->has_unambiguous_public_base(&info, adjustedPtr, public_path);
     if (info.path_dst_ptr_to_static_ptr == public_path)
@@ -496,19 +504,38 @@ __class_type_info::can_catch(const __shim_type_info* thrown_type,
 #pragma clang diagnostic pop
 #endif
 
+// When we have an object to inspect - we just pass the pointer to the sub-
+// object that matched the static_type we just checked.  If that is different
+// from any previously recorded pointer to that object type, then we have
+// an ambiguous case.
+
+// When we have no object to inspect, we need to account for virtual bases
+// explicitly.
+// virtualBase is the pointer to the name of the innermost virtual base
+// type, or nullptr if there is no virtual base on the path so far.
+// adjustedPtr points to the subobject we just found.
+// If virtualBase != any previously recorded (including the case of nullptr
+// representing an already-found static sub-object) then we have an ambiguous
+// case.  Assuming that the virtualBase values agree; if then we have a
+// different offset (adjustedPtr) from any previously recorded, this indicates
+// an ambiguous case within the virtual base.
+
 void
 __class_type_info::process_found_base_class(__dynamic_cast_info* info,
                                                void* adjustedPtr,
                                                int path_below) const
 {
-    if (info->dst_ptr_leading_to_static_ptr == 0)
+    if (info->number_to_static_ptr == 0)
     {
-        // First time here
+        // First time we found this base
         info->dst_ptr_leading_to_static_ptr = adjustedPtr;
         info->path_dst_ptr_to_static_ptr = path_below;
+        // stash the virtual base cookie.
+        info->dst_ptr_not_leading_to_static_ptr = info->vbase_cookie;
         info->number_to_static_ptr = 1;
     }
-    else if (info->dst_ptr_leading_to_static_ptr == adjustedPtr)
+    else if (info->dst_ptr_not_leading_to_static_ptr == info->vbase_cookie &&
+             info->dst_ptr_leading_to_static_ptr == adjustedPtr)
     {
         // We've been here before.  Update path to "most public"
         if (info->path_dst_ptr_to_static_ptr == not_public_path)
@@ -517,7 +544,7 @@ __class_type_info::process_found_base_class(__dynamic_cast_info* info,
     else
     {
         // We've detected an ambiguous cast from (thrown_class_type, adjustedPtr)
-        //   to a static_type
+        // to a static_type
         info->number_to_static_ptr += 1;
         info->path_dst_ptr_to_static_ptr = not_public_path;
         info->search_done = true;
@@ -549,15 +576,31 @@ __base_class_type_info::has_unambiguous_public_base(__dynamic_cast_info* info,
                                                     void* adjustedPtr,
                                                     int path_below) const
 {
+    bool is_virtual = __offset_flags & __virtual_mask;
     ptrdiff_t offset_to_base = 0;
-    if (adjustedPtr != nullptr)
+    if (info->have_object)
     {
+        /* We have an object to inspect, we can look through its vtables to
+           find the layout.  */
         offset_to_base = __offset_flags >> __offset_shift;
-        if (__offset_flags & __virtual_mask)
+        if (is_virtual)
         {
             const char* vtable = *static_cast<const char*const*>(adjustedPtr);
             offset_to_base = update_offset_to_base(vtable, offset_to_base);
         }
+    } else if (! is_virtual) {
+        /* We have no object - so we cannot use it for determining layout when
+           we have a virtual base (since we cannot indirect through the vtable
+           to find the actual object offset).  However, for non-virtual bases,
+           we can pretend to have an object based at '0' */
+        offset_to_base = __offset_flags >> __offset_shift;
+    } else {
+      // no object to inspect, and the next base is virtual.
+      // we want to update virtualBase to the new innermost virtual base.
+      // using the pointer to the typeinfo name as a key.
+      info->vbase_cookie = static_cast<const void*>(__base_type->name ());
+      // .. and reset the pointer.
+      adjustedPtr = nullptr;
     }
     __base_type->has_unambiguous_public_base(
             info,
@@ -679,13 +722,22 @@ __pointer_type_info::can_catch(const __shim_type_info* thrown_type,
         dynamic_cast<const __class_type_info*>(thrown_pointer_type->__pointee);
     if (thrown_class_type == 0)
         return false;
-    __dynamic_cast_info info = {thrown_class_type, 0, catch_class_type, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};
+    bool have_object = adjustedPtr != nullptr;
+    __dynamic_cast_info info = {thrown_class_type, 0, catch_class_type, -1,
+                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+			        have_object, nullptr };
     info.number_of_dst_type = 1;
     thrown_class_type->has_unambiguous_public_base(&info, adjustedPtr, public_path);
     if (info.path_dst_ptr_to_static_ptr == public_path)
     {
-        if (adjustedPtr != NULL)
+        // In the case of a thrown null pointer, we have no object but we might
+        // well have computed the offset to where a public sub-object would be.
+        // However, we do not want to return that offset to the user; we still
+        // want them to catch a null ptr.
+        if (have_object)
             adjustedPtr = const_cast<void*>(info.dst_ptr_leading_to_static_ptr);
+        else
+            adjustedPtr = nullptr;
         return true;
     }
     return false;
diff --git a/libcxxabi/src/private_typeinfo.h b/libcxxabi/src/private_typeinfo.h
index 622e09cc24217f3..33449c42c67dcce 100644
--- a/libcxxabi/src/private_typeinfo.h
+++ b/libcxxabi/src/private_typeinfo.h
@@ -110,6 +110,13 @@ struct _LIBCXXABI_HIDDEN __dynamic_cast_info
     bool found_any_static_type;
     // Set whenever a search can be stopped
     bool search_done;
+
+    // Data that modifies the search mechanism.
+
+    // There is no object (seen when we throw a null pointer to object).
+    bool have_object;
+    // Virtual base
+    const void* vbase_cookie;
 };
 
 // Has no base class
@@ -122,8 +129,7 @@ class _LIBCXXABI_TYPE_VIS __class_type_info : public __shim_type_info {
                                                        const void *, int) const;
   _LIBCXXABI_HIDDEN void process_static_type_below_dst(__dynamic_cast_info *,
                                                        const void *, int) const;
-  _LIBCXXABI_HIDDEN void process_found_base_class(__dynamic_cast_info *, void *,
-                                                  int) const;
+  _LIBCXXABI_HIDDEN void process_found_base_class(__dynamic_cast_info*, void*, int) const;
   _LIBCXXABI_HIDDEN virtual void search_above_dst(__dynamic_cast_info *,
                                                   const void *, const void *,
                                                   int, bool) const;
@@ -131,8 +137,7 @@ class _LIBCXXABI_TYPE_VIS __class_type_info : public __shim_type_info {
   search_below_dst(__dynamic_cast_info *, const void *, int, bool) const;
   _LIBCXXABI_HIDDEN virtual bool can_catch(const __shim_type_info *,
                                            void *&) const;
-  _LIBCXXABI_HIDDEN virtual void
-  has_unambiguous_public_base(__dynamic_cast_info *, void *, int) const;
+  _LIBCXXABI_HIDDEN virtual void has_unambiguous_public_base(__dynamic_cast_info*, void*, int) const;
 };
 
 // Has one non-virtual public base class at offset zero
@@ -147,8 +152,7 @@ class _LIBCXXABI_TYPE_VIS __si_class_type_info : public __class_type_info {
                                                   int, bool) const;
   _LIBCXXABI_HIDDEN virtual void
   search_below_dst(__dynamic_cast_info *, const void *, int, bool) const;
-  _LIBCXXABI_HIDDEN virtual void
-  has_unambiguous_public_base(__dynamic_cast_info *, void *, int) const;
+  _LIBCXXABI_HIDDEN virtual void has_unambiguous_public_base(__dynamic_cast_info*, void*, int) const;
 };
 
 struct _LIBCXXABI_HIDDEN __base_class_type_info
@@ -190,8 +194,7 @@ class _LIBCXXABI_TYPE_VIS __vmi_class_type_info : public __class_type_info {
                                                   int, bool) const;
   _LIBCXXABI_HIDDEN virtual void
   search_below_dst(__dynamic_cast_info *, const void *, int, bool) const;
-  _LIBCXXABI_HIDDEN virtual void
-  has_unambiguous_public_base(__dynamic_cast_info *, void *, int) const;
+  _LIBCXXABI_HIDDEN virtual void has_unambiguous_public_base(__dynamic_cast_info*, void*, int) const;
 };
 
 class _LIBCXXABI_TYPE_VIS __pbase_type_info : public __shim_type_info {
diff --git a/libcxxabi/test/catch_null_pointer_to_object_pr64953.pass.cpp b/libcxxabi/test/catch_null_pointer_to_object_pr64953.pass.cpp
new file mode 100644
index 000000000000000..9cbebed78aa83a9
--- /dev/null
+++ b/libcxxabi/test/catch_null_pointer_to_object_pr64953.pass.cpp
@@ -0,0 +1,189 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This test case checks specifically the cases under bullet 3.3:
+//
+//  C++ ABI 15.3:
+//  A handler is a match for an exception object of type E if
+//     *  The handler is of type cv T or cv T& and E and T are the same type
+//        (ignoring the top-level cv-qualifiers), or
+//     *  the handler is of type cv T or cv T& and T is an unambiguous base
+//        class of E, or
+//  >  *  the handler is of type cv1 T* cv2 and E is a pointer type that can   <
+//  >     be converted to the type of the handler by either or both of         <
+//  >       o  a standard pointer conversion (4.10 [conv.ptr]) not involving   <
+//  >          conversions to private or protected or ambiguous classes        <
+//  >       o  a qualification conversion                                      <
+//     *  the handler is a pointer or pointer to member type and E is
+//        std::nullptr_t
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: no-exceptions
+// This test requires the fix to
+// https://github.com/llvm/llvm-project/issues/64953, which is in libc++abi.dylib.
+// The fix is not contained in older macOS system dylibs, so the test will fail
+// there.
+// XFAIL: stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}{{.*}}
+// XFAIL: stdlib=apple-libc++ && target={{.+}}-apple-macosx{{11|12|13|14}}{{.*}}
+
+#include <exception>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+
+struct Base {
+  int b;
+};
+struct Base2 {
+  int b;
+};
+struct Derived1 : Base {
+  int b;
+};
+struct Derived2 : Base {
+  int b;
+};
+struct Derived3 : Base2 {
+  int b;
+};
+struct Private : private Base {
+  int b;
+};
+struct Protected : protected Base {
+  int b;
+};
+struct Virtual1 : virtual Base {
+  int b;
+};
+struct Virtual2 : virtual Base {
+  int b;
+};
+
+struct Ambiguous1 : Derived1, Derived2 {
+  int b;
+};
+struct Ambiguous2 : Derived1, Private {
+  int b;
+};
+struct Ambiguous3 : Derived1, Protected {
+  int b;
+};
+
+struct NoPublic1 : Private, Base2 {
+  int b;
+};
+struct NoPublic2 : Protected, Base2 {
+  int b;
+};
+
+struct Catchable1 : Derived3, Derived1 {
+  int b;
+};
+struct Catchable2 : Virtual1, Virtual2 {
+  int b;
+};
+struct Catchable3 : virtual Base, Virtual2 {
+  int b;
+};
+
+// Check that, when we have a null pointer-to-object that we catch a nullptr.
+template <typename T // Handler type
+          ,
+          typename E // Thrown exception type
+          >
+void assert_catches() {
+  try {
+    throw static_cast<E>(0);
+    printf("%s\n", __PRETTY_FUNCTION__);
+    assert(false && "Statements after throw must be unreachable");
+  } catch (T t) {
+    assert(t == nullptr);
+    return;
+  } catch (...) {
+    printf("%s\n", __PRETTY_FUNCTION__);
+    assert(false && "Should not have entered catch-all");
+  }
+
+  printf("%s\n", __PRETTY_FUNCTION__);
+  assert(false && "The catch should have returned");
+}
+
+template <typename T // Handler type
+          ,
+          typename E // Thrown exception type
+          >
+void assert_cannot_catch() {
+  try {
+    throw static_cast<E>(0);
+    printf("%s\n", __PRETTY_FUNCTION__);
+    assert(false && "Statements after throw must be unreachable");
+  } catch (T t) {
+    printf("%s\n", __PRETTY_FUNCTION__);
+    assert(false && "Should not have entered the catch");
+  } catch (...) {
+    assert(true);
+    return;
+  }
+
+  printf("%s\n", __PRETTY_FUNCTION__);
+  assert(false && "The catch-all should have returned");
+}
+
+// Check that when we have a pointer-to-actual-object we, in fact, get the
+// adjusted pointer to the base class.
+template <typename T // Handler type
+          ,
+          typename O // Object type
+          >
+void assert_catches_bp() {
+  O* o = new (O);
+  try {
+    throw o;
+    printf("%s\n", __PRETTY_FUNCTION__);
+    assert(false && "Statements after throw must be unreachable");
+  } catch (T t) {
+    assert(t == static_cast<T>(o));
+    //__builtin_printf("o = %p t = %p\n", o, t);
+    delete o;
+    return;
+  } catch (...) {
+    printf("%s\n", __PRETTY_FUNCTION__);
+    assert(false && "Should not have entered catch-all");
+  }
+
+  printf("%s\n", __PRETTY_FUNCTION__);
+  assert(false && "The catch should have returned");
+}
+
+void f1() {
+  assert_catches<Base*, Catchable1*>();
+  assert_catches<Base*, Catchable2*>();
+  assert_catches<Base*, Catchable3*>();
+}
+
+void f2() {
+  assert_cannot_catch<Base*, Ambiguous1*>();
+  assert_cannot_catch<Base*, Ambiguous2*>();
+  assert_cannot_catch<Base*, Ambiguous3*>();
+  assert_cannot_catch<Base*, NoPublic1*>();
+  assert_cannot_catch<Base*, NoPublic2*>();
+}
+
+void f3() {
+  assert_catches_bp<Base*, Catchable1>();
+  assert_catches_bp<Base*, Catchable2>();
+  assert_catches_bp<Base*, Catchable3>();
+}
+
+int main(int, char**) {
+  f1();
+  f2();
+  f3();
+  return 0;
+}



More information about the libcxx-commits mailing list