[llvm] [libcxx] [clang] [clang-tools-extra] Mark some std::string functions noinline. (PR #72869)

James Y Knight via cfe-commits cfe-commits at lists.llvm.org
Mon Nov 27 14:11:08 PST 2023


https://github.com/jyknight updated https://github.com/llvm/llvm-project/pull/72869

>From 7c51f0cb27079c79f924ff746dccb14637641fe4 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 20 Nov 2023 13:43:32 +0000
Subject: [PATCH] Mark some std::string functions noinline.

The intent of these particular functions, since their introduction,
was to NOT be inlinable.

However, the mechanism by which this was accomplished was non-obvious,
and stopped working when string is compiled for C++20.

A longstanding behavior specified by the C++ standard is that
instantiation of the body of a template function is suppressed by an
extern template declaration -- unless the function is explicitly
marked either constexpr or inline. Of course, if the body is not
instantiated, then it cannot possibly be inlined, and thus all the
functions listed in libcxx/include/__string/extern_template_lists.h
were uninlineable.

But, in C++20 mode, string functions were annotated constexpr, which
means they _are_ instantiated, and do become inlineable. And, in fact,
they do get inlined, which has caused noticeable binary-size growth
for users.

For example, in C++17,
`std::string f(std::string *in) { return *in; }`
does not inline the copy-constructor call, and instead generates a
call to the exported function defined in the libc++ shared library.

I think we probably don't want to mark all functions that are
currently in the extern template list as noinline, as many of them
really are reasonable inlining candidates. Thus, I've restricted this
change to only the few functions that were clearly intended to be
outlined.

See commits like b019c5c0372eb08800327efb5e7955ce918b75d1 (and some
others like it) for background, in which functions were removed from
the extern template list in the unstable ABI in order to allow the
short-string case to be inlined, while moving the long-string case to
a separate function, added to the extern template list.
---
 libcxx/include/__config |  6 ++++++
 libcxx/include/string   | 20 ++++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index e8da358bb8d7cd5..267d58735e8aab3 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1279,6 +1279,12 @@ __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, c
 #    define _LIBCPP_NO_SANITIZE(...)
 #  endif
 
+#  if __has_attribute(__noinline__)
+#    define _LIBCPP_NOINLINE __attribute__((__noinline__))
+#  else
+#    define _LIBCPP_NOINLINE
+#  endif
+
 // We often repeat things just for handling wide characters in the library.
 // When wide characters are disabled, it can be useful to have a quick way of
 // disabling it without having to resort to #if-#endif, which has a larger
diff --git a/libcxx/include/string b/libcxx/include/string
index 9c2efac8006bd72..fa06ac316b2d16c 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1899,7 +1899,7 @@ private:
     // to call the __init() functions as those are marked as inline which may
     // result in over-aggressive inlining by the compiler, where our aim is
     // to only inline the fast path code directly in the ctor.
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 void __init_copy_ctor_external(const value_type* __s, size_type __sz);
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void __init_copy_ctor_external(const value_type* __s, size_type __sz);
 
     template <class _InputIterator, __enable_if_t<__has_exactly_input_iterator_category<_InputIterator>::value, int> = 0>
     inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void __init(_InputIterator __first, _InputIterator __last);
@@ -1933,7 +1933,7 @@ private:
     // have proof that the input does not alias the current instance.
     // For example, operator=(basic_string) performs a 'self' check.
     template <bool __is_short>
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_no_alias(const value_type* __s, size_type __n);
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string& __assign_no_alias(const value_type* __s, size_type __n);
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __erase_to_end(size_type __pos) {
     __null_terminate_at(std::__to_address(__get_pointer()), __pos);
@@ -1941,7 +1941,7 @@ private:
 
     // __erase_external_with_move is invoked for erase() invocations where
     // `n ~= npos`, likely requiring memory moves on the string data.
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 void __erase_external_with_move(size_type __pos, size_type __n);
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE void __erase_external_with_move(size_type __pos, size_type __n);
 
     _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
     void __copy_assign_alloc(const basic_string& __str)
@@ -2013,8 +2013,8 @@ private:
         _NOEXCEPT
         {}
 
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_external(const value_type* __s);
-    _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_external(const value_type* __s, size_type __n);
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string& __assign_external(const value_type* __s);
+    _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE basic_string& __assign_external(const value_type* __s, size_type __n);
 
     // Assigns the value in __s, guaranteed to be __n < __min_cap in length.
     inline _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_short(const value_type* __s, size_type __n) {
@@ -2167,7 +2167,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
 }
 
 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
 void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
     const value_type* __s, size_type __sz) {
   if (__libcpp_is_constant_evaluated())
@@ -2396,7 +2396,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by_without_replace(
 
 template <class _CharT, class _Traits, class _Allocator>
 template <bool __is_short>
-_LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(
     const value_type* __s, size_type __n) {
@@ -2414,7 +2414,7 @@ basic_string<_CharT, _Traits, _Allocator>::__assign_no_alias(
 }
 
 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::__assign_external(
     const value_type* __s, size_type __n) {
@@ -2625,7 +2625,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const _Tp& __t, size_type __po
 }
 
 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::__assign_external(const value_type* __s) {
   return __assign_external(__s, traits_type::length(__s));
@@ -3110,7 +3110,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
 // 'externally instantiated' erase() implementation, called when __n != npos.
 // Does not check __pos against size()
 template <class _CharT, class _Traits, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_NOINLINE
 void
 basic_string<_CharT, _Traits, _Allocator>::__erase_external_with_move(
     size_type __pos, size_type __n)



More information about the cfe-commits mailing list