[libcxx-commits] [libcxx] [libc++] Protect the libc++ implementation from CUDA SDK's `__noinline__` macro (PR #73838)

Dmitri Gribenko via libcxx-commits libcxx-commits at lists.llvm.org
Fri Dec 1 09:40:15 PST 2023


gribozavr wrote:

@philnik777 Thank you for the idea!

> I think we can fix this all by simply using `[[__gnu__::__noinline__]]` instead.

So you're suggesting that we change the definition of `_LIBCPP_NOINLINE`:

```
#define _LIBCPP_NOINLINE [[__gnu__::__noinline__]]
```

Thus, it would expand to either `[[__gnu__::__noinline__]]` under normal circumstances, or to `[[__gnu__::__attribute__((noinline))]]` under CUDA?

That would work if `_LIBCPP_NOINLINE` was the only usage of `__noinline__`. We also have the `#if __has_attribute(__noinline__)` that gets broken.

(I also have a concern that there are no tests in Clang for such syntax, so I suspect the fact that it works is accidental - but even if it is intentional, the `__has_attribute()` expression does not work.)

Here is a patch if you want to try it yourself:

<details>
<summary>A patch that changes <tt>_LIBCPP_NOINLINE</tt> to <tt>[[__gnu__::__noinline__]]</tt></summary>

```
diff --git a/libcxx/include/__config b/libcxx/include/__config
index ee77305162f7..e078a7ea3ba1 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1176,7 +1176,7 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c
 #  endif

 #  if __has_attribute(__noinline__)
-#    define _LIBCPP_NOINLINE __attribute__((__noinline__))
+#    define _LIBCPP_NOINLINE [[__gnu__::__noinline__]]
 #  else
 #    define _LIBCPP_NOINLINE
 #  endif
diff --git a/libcxx/include/string b/libcxx/include/string
index 25f307825fa2..8bce9af76135 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1901,7 +1901,7 @@ private:
     // to call the __init() functions as those are marked as inline which may
     // result in over-aggressive inlining by the compiler, where our aim is
     // to only inline the fast path code directly in the ctor.
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void __init_copy_ctor_external(const value_type* __s, size_type __sz);
+    _LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20 void __init_copy_ctor_external(const value_type* __s, size_type __sz);

     template <class _InputIterator, __enable_if_t<__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0>
     inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void __init(_InputIterator __first, _InputIterator __last);
@@ -1935,7 +1935,7 @@ private:
     // have proof that the input does not alias the current instance.
     // For example, operator=(basic_string) performs a 'self' check.
     template <bool __is_short>
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string& __assign_no_alias(const value_type* __s, size_type __n);
+    _LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_no_alias(const value_type* __s, size_type __n);

   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __erase_to_end(size_type __pos) {
     __null_terminate_at(std::__to_address(__get_pointer()), __pos);
@@ -1943,7 +1943,7 @@ private:

     // __erase_external_with_move is invoked for erase() invocations where
     // `n ~= npos`, likely requiring memory moves on the string data.
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void __erase_external_with_move(size_type __pos, size_type __n);
+    _LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20 void __erase_external_with_move(size_type __pos, size_type __n);

     _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
     void __copy_assign_alloc(const basic_string& __str)
@@ -2015,8 +2015,8 @@ private:
         _NOEXCEPT
         {}

-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string& __assign_external(const value_type* __s);
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string& __assign_external(const value_type* __s, size_type __n);
+    _LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_external(const value_type* __s);
+    _LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_external(const value_type* __s, size_type __n);

     // Assigns the value in __s, guaranteed to be __n < __min_cap in length.
     inline _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_short(const value_type* __s, size_type __n) {
@@ -2169,7 +2169,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
 }

 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
+_LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20
 void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
     const value_type* __s, size_type __sz) {
   if (__libcpp_is_constant_evaluated())
@@ -2398,7 +2398,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by_without_replace(

 template <class _CharT, class _Traits, class _Allocator>
 template <bool __is_short>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
+_LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(
     const value_type* __s, size_type __n) {
@@ -2416,7 +2416,7 @@ basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(
 }

 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
+_LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::__assign_external(
     const value_type* __s, size_type __n) {
@@ -2627,7 +2627,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const _Tp& __t, size_type __po
 }

 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
+_LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::__assign_external(const value_type* __s) {
   return __assign_external(__s, traits_type::length(__s));
@@ -3112,7 +3112,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
 // 'externally instantiated' erase() implementation, called when __n != npos.
 // Does not check __pos against size()
 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
+_LIBCPP_NOINLINE _LIBCPP_CONSTEXPR_SINCE_CXX20
 void
 basic_string<_CharT, _Traits, _Allocator>::__erase_external_with_move(
     size_type __pos, size_type __n)
diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py
index 8c4be97897f6..de68e0d666bd 100644
--- a/libcxx/test/libcxx/system_reserved_names.gen.py
+++ b/libcxx/test/libcxx/system_reserved_names.gen.py
@@ -158,5 +158,8 @@ for header in public_headers:
 #define erase SYSTEM_RESERVED_NAME
 #define refresh SYSTEM_RESERVED_NAME

+// Macros from the CUDA SDKs
+#define __noinline__ SYSTEM_RESERVED_NAME
+
 #include <{header}>
 """)
```
</details>



https://github.com/llvm/llvm-project/pull/73838


More information about the libcxx-commits mailing list