[libcxx-commits] [libcxx] [libc++][format] Discard contents since null-terminator in character arrays in formatting (PR #116571)

A. Jiang via libcxx-commits libcxx-commits at lists.llvm.org
Sun Dec 22 08:37:49 PST 2024


https://github.com/frederick-vs-ja updated https://github.com/llvm/llvm-project/pull/116571

>From 96f2f4c322ea679520c24e76b0e1ff0f0368ac63 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Mon, 18 Nov 2024 10:04:33 +0800
Subject: [PATCH 1/4] [libc++][format] Decay character arrays in formatting

Currently, built-in `char`/`wchar_t` arrays are assumed to be
null-terminated sequence with the terminator being the last element in
formatting. This doesn't conform to [format.arg]/6.9.

> otherwise, if `decay_t<TD>` is `char_type*` or `const char_type*`,
> initializes value with `static_cast<const char_type*>(v)`;

The standard wording specifies that character arrays are decayed to
pointers. When the null terminator is not the last element or there's no
null terminator (the latter case is UB), libc++ currently produces
different results.
---
 libcxx/include/__format/format_arg_store.h    | 23 ++++---------------
 .../format/format.functions/format_tests.h    |  4 ++++
 2 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h
index 8b2c95c657c9bd..78b142ebcdb743 100644
--- a/libcxx/include/__format/format_arg_store.h
+++ b/libcxx/include/__format/format_arg_store.h
@@ -101,20 +101,14 @@ consteval __arg_t __determine_arg_t() {
   return __arg_t::__long_double;
 }
 
-// Char pointer
+// Char pointer or array
 template <class _Context, class _Tp>
-  requires(same_as<typename _Context::char_type*, _Tp> || same_as<const typename _Context::char_type*, _Tp>)
+  requires(same_as<typename _Context::char_type*, _Tp> || same_as<const typename _Context::char_type*, _Tp>) ||
+          (is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>)
 consteval __arg_t __determine_arg_t() {
   return __arg_t::__const_char_type_ptr;
 }
 
-// Char array
-template <class _Context, class _Tp>
-  requires(is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>)
-consteval __arg_t __determine_arg_t() {
-  return __arg_t::__string_view;
-}
-
 // String view
 template <class _Context, class _Tp>
   requires(same_as<typename _Context::char_type, typename _Tp::value_type> &&
@@ -188,15 +182,8 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
   else if constexpr (__arg == __arg_t::__unsigned_long_long)
     return basic_format_arg<_Context>{__arg, static_cast<unsigned long long>(__value)};
   else if constexpr (__arg == __arg_t::__string_view)
-    // Using std::size on a character array will add the NUL-terminator to the size.
-    if constexpr (is_array_v<_Dp>)
-      return basic_format_arg<_Context>{
-          __arg, basic_string_view<typename _Context::char_type>{__value, extent_v<_Dp> - 1}};
-    else
-      // When the _Traits or _Allocator are different an implicit conversion will
-      // fail.
-      return basic_format_arg<_Context>{
-          __arg, basic_string_view<typename _Context::char_type>{__value.data(), __value.size()}};
+    return basic_format_arg<_Context>{
+        __arg, basic_string_view<typename _Context::char_type>{__value.data(), __value.size()}};
   else if constexpr (__arg == __arg_t::__ptr)
     return basic_format_arg<_Context>{__arg, static_cast<const void*>(__value)};
   else if constexpr (__arg == __arg_t::__handle)
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index b2ed6775fe8a13..33212fa663e4a5 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -3189,6 +3189,10 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
     const CharT* data = buffer;
     check(SV("hello 09azAZ!"), SV("hello {}"), data);
   }
+  {
+    CharT buffer[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f'), 0};
+    check(SV("hello abc"), SV("hello {}"), buffer);
+  }
   {
     std::basic_string<CharT> data = STR("world");
     check(SV("hello world"), SV("hello {}"), data);

>From e051073552c2f02552597d6ed1c3b0c717e9d234 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Wed, 27 Nov 2024 14:36:40 +0800
Subject: [PATCH 2/4] Address review comments

---
 libcxx/include/__format/format_arg_store.h    | 35 +++++++++++++++----
 .../format/format.functions/format_tests.h    |  2 ++
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h
index 78b142ebcdb743..f514ea4def8ddf 100644
--- a/libcxx/include/__format/format_arg_store.h
+++ b/libcxx/include/__format/format_arg_store.h
@@ -17,6 +17,7 @@
 #include <__concepts/arithmetic.h>
 #include <__concepts/same_as.h>
 #include <__config>
+#include <__cstddef/size_t.h>
 #include <__format/concepts.h>
 #include <__format/format_arg.h>
 #include <__type_traits/conditional.h>
@@ -32,6 +33,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 namespace __format {
 
+template <class _Arr, class _Elem>
+inline constexpr bool __is_bounded_array_of = false;
+
+template <class _Elem, size_t _Len>
+inline constexpr bool __is_bounded_array_of<_Elem[_Len], _Elem> = true;
+
 /// \returns The @c __arg_t based on the type of the formatting argument.
 ///
 /// \pre \c __formattable<_Tp, typename _Context::char_type>
@@ -101,14 +108,20 @@ consteval __arg_t __determine_arg_t() {
   return __arg_t::__long_double;
 }
 
-// Char pointer or array
+// Char pointer
 template <class _Context, class _Tp>
-  requires(same_as<typename _Context::char_type*, _Tp> || same_as<const typename _Context::char_type*, _Tp>) ||
-          (is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>)
+  requires(same_as<typename _Context::char_type*, _Tp> || same_as<const typename _Context::char_type*, _Tp>)
 consteval __arg_t __determine_arg_t() {
   return __arg_t::__const_char_type_ptr;
 }
 
+// Char array
+template <class _Context, class _Tp>
+  requires __is_bounded_array_of<_Tp, typename _Context::char_type>
+consteval __arg_t __determine_arg_t() {
+  return __arg_t::__string_view;
+}
+
 // String view
 template <class _Context, class _Tp>
   requires(same_as<typename _Context::char_type, typename _Tp::value_type> &&
@@ -162,13 +175,14 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
   static_assert(__arg != __arg_t::__none, "the supplied type is not formattable");
   static_assert(__formattable_with<_Tp, _Context>);
 
+  using __context_char_type = _Context::char_type;
   // Not all types can be used to directly initialize the
   // __basic_format_arg_value.  First handle all types needing adjustment, the
   // final else requires no adjustment.
   if constexpr (__arg == __arg_t::__char_type)
 
 #  if _LIBCPP_HAS_WIDE_CHARACTERS
-    if constexpr (same_as<typename _Context::char_type, wchar_t> && same_as<_Dp, char>)
+    if constexpr (same_as<__context_char_type, wchar_t> && same_as<_Dp, char>)
       return basic_format_arg<_Context>{__arg, static_cast<wchar_t>(static_cast<unsigned char>(__value))};
     else
 #  endif
@@ -182,8 +196,17 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
   else if constexpr (__arg == __arg_t::__unsigned_long_long)
     return basic_format_arg<_Context>{__arg, static_cast<unsigned long long>(__value)};
   else if constexpr (__arg == __arg_t::__string_view)
-    return basic_format_arg<_Context>{
-        __arg, basic_string_view<typename _Context::char_type>{__value.data(), __value.size()}};
+    // Using std::size on a character array will add the NUL-terminator to the size.
+    if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) {
+      if (const auto __pzero = char_traits<__context_char_type>::find(__value, extent_v<_Dp>, __context_char_type{}))
+        return basic_format_arg<_Context>{
+            __arg, basic_string_view<__context_char_type>{__value, static_cast<size_t>(__pzero - __value)}};
+      else
+        // The behavior is undefined in this case.
+        return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value, extent_v<_Dp>}};
+    } else
+      // When the _Traits or _Allocator are different an implicit conversion will fail.
+      return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value.data(), __value.size()}};
   else if constexpr (__arg == __arg_t::__ptr)
     return basic_format_arg<_Context>{__arg, static_cast<const void*>(__value)};
   else if constexpr (__arg == __arg_t::__handle)
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 33212fa663e4a5..424205d0733044 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -3190,6 +3190,8 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
     check(SV("hello 09azAZ!"), SV("hello {}"), data);
   }
   {
+    // https://github.com/llvm/llvm-project/issues/115935
+    // Contents after the embedded null character are discarded.
     CharT buffer[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f'), 0};
     check(SV("hello abc"), SV("hello {}"), buffer);
   }

>From 2c10c7784c26362f35550fdac23824d197a0ba9b Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Mon, 23 Dec 2024 00:10:41 +0800
Subject: [PATCH 3/4] Address review comments

---
 libcxx/include/__format/format_arg_store.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h
index f514ea4def8ddf..a4144faa63b6ed 100644
--- a/libcxx/include/__format/format_arg_store.h
+++ b/libcxx/include/__format/format_arg_store.h
@@ -198,12 +198,14 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
   else if constexpr (__arg == __arg_t::__string_view)
     // Using std::size on a character array will add the NUL-terminator to the size.
     if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) {
-      if (const auto __pzero = char_traits<__context_char_type>::find(__value, extent_v<_Dp>, __context_char_type{}))
+      const auto __pbegin = std::begin(__value);
+      if (const _Dp* __pzero = char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{})) {
         return basic_format_arg<_Context>{
-            __arg, basic_string_view<__context_char_type>{__value, static_cast<size_t>(__pzero - __value)}};
-      else
-        // The behavior is undefined in this case.
-        return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value, extent_v<_Dp>}};
+            __arg, basic_string_view<__context_char_type>{__pbegin, static_cast<size_t>(__pzero - __pbegin)}};
+      } else {
+        // Per [format.arg]/5, the behavior is undefined because the array is not null-terminated.
+        return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__pbegin, extent_v<_Dp>}};
+      }
     } else
       // When the _Traits or _Allocator are different an implicit conversion will fail.
       return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value.data(), __value.size()}};

>From f7777e8c958ec78370a4de919d35f9b782f9f9e8 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Mon, 23 Dec 2024 00:35:31 +0800
Subject: [PATCH 4/4] Fix type of `__pzero`

---
 libcxx/include/__format/format_arg_store.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h
index a4144faa63b6ed..1ab7c385d70393 100644
--- a/libcxx/include/__format/format_arg_store.h
+++ b/libcxx/include/__format/format_arg_store.h
@@ -199,7 +199,8 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
     // Using std::size on a character array will add the NUL-terminator to the size.
     if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) {
       const auto __pbegin = std::begin(__value);
-      if (const _Dp* __pzero = char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{})) {
+      if (const __context_char_type* const __pzero =
+              char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{})) {
         return basic_format_arg<_Context>{
             __arg, basic_string_view<__context_char_type>{__pbegin, static_cast<size_t>(__pzero - __pbegin)}};
       } else {



More information about the libcxx-commits mailing list