[libcxx-commits] [libcxx] ab60910 - [libc++][format] Discard contents since null-terminator in character arrays in formatting (#116571)
via libcxx-commits
libcxx-commits at lists.llvm.org
Mon May 12 13:49:28 PDT 2025
Author: A. Jiang
Date: 2025-05-12T16:49:25-04:00
New Revision: ab60910e01eaa8bcf993cdb59a95f882b3859fd1
URL: https://github.com/llvm/llvm-project/commit/ab60910e01eaa8bcf993cdb59a95f882b3859fd1
DIFF: https://github.com/llvm/llvm-project/commit/ab60910e01eaa8bcf993cdb59a95f882b3859fd1.diff
LOG: [libc++][format] Discard contents since null-terminator in character arrays in formatting (#116571)
Currently, built-in `char`/`wchar_t` arrays are assumed to be
null-terminated sequence with the terminator being the last element in
formatting. This doesn't conform to [format.arg]/6.9.
> otherwise, if `decay_t<TD>` is `char_type*` or `const char_type*`,
> initializes value with `static_cast<const char_type*>(v)`;
The standard wording specifies that character arrays are decayed to
pointers. When the null terminator is not the last element or there's no
null terminator (the latter case is UB), libc++ currently produces
different results.
Also fixes and hardens `formatter<CharT[N], CharT>::format` in
`<__format/formatter_string.h>`. These specializations are rarely used.
Fixes #115935. Also checks the preconditions in this case, which fixes
#116570.
Added:
libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp
Modified:
libcxx/include/__format/format_arg_store.h
libcxx/include/__format/formatter_string.h
libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp
libcxx/test/std/utilities/format/format.functions/format_tests.h
Removed:
################################################################################
diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h
index dba2dfd6df084..87557aa4da7bb 100644
--- a/libcxx/include/__format/format_arg_store.h
+++ b/libcxx/include/__format/format_arg_store.h
@@ -17,6 +17,7 @@
#include <__concepts/arithmetic.h>
#include <__concepts/same_as.h>
#include <__config>
+#include <__cstddef/size_t.h>
#include <__format/concepts.h>
#include <__format/format_arg.h>
#include <__type_traits/conditional.h>
@@ -32,6 +33,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD
namespace __format {
+template <class _Arr, class _Elem>
+inline constexpr bool __is_bounded_array_of = false;
+
+template <class _Elem, size_t _Len>
+inline constexpr bool __is_bounded_array_of<_Elem[_Len], _Elem> = true;
+
/// \returns The @c __arg_t based on the type of the formatting argument.
///
/// \pre \c __formattable<_Tp, typename _Context::char_type>
@@ -110,7 +117,7 @@ consteval __arg_t __determine_arg_t() {
// Char array
template <class _Context, class _Tp>
- requires(is_array_v<_Tp> && same_as<_Tp, typename _Context::char_type[extent_v<_Tp>]>)
+ requires __is_bounded_array_of<_Tp, typename _Context::char_type>
consteval __arg_t __determine_arg_t() {
return __arg_t::__string_view;
}
@@ -168,13 +175,14 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
static_assert(__arg != __arg_t::__none, "the supplied type is not formattable");
static_assert(__formattable_with<_Tp, _Context>);
+ using __context_char_type = _Context::char_type;
// Not all types can be used to directly initialize the
// __basic_format_arg_value. First handle all types needing adjustment, the
// final else requires no adjustment.
if constexpr (__arg == __arg_t::__char_type)
# if _LIBCPP_HAS_WIDE_CHARACTERS
- if constexpr (same_as<typename _Context::char_type, wchar_t> && same_as<_Dp, char>)
+ if constexpr (same_as<__context_char_type, wchar_t> && same_as<_Dp, char>)
return basic_format_arg<_Context>{__arg, static_cast<wchar_t>(static_cast<unsigned char>(__value))};
else
# endif
@@ -189,14 +197,16 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
return basic_format_arg<_Context>{__arg, static_cast<unsigned long long>(__value)};
else if constexpr (__arg == __arg_t::__string_view)
// Using std::size on a character array will add the NUL-terminator to the size.
- if constexpr (is_array_v<_Dp>)
- return basic_format_arg<_Context>{
- __arg, basic_string_view<typename _Context::char_type>{__value, extent_v<_Dp> - 1}};
- else
- // When the _Traits or _Allocator are
diff erent an implicit conversion will
- // fail.
+ if constexpr (__is_bounded_array_of<_Dp, __context_char_type>) {
+ const __context_char_type* const __pbegin = std::begin(__value);
+ const __context_char_type* const __pzero =
+ char_traits<__context_char_type>::find(__pbegin, extent_v<_Dp>, __context_char_type{});
+ _LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array");
return basic_format_arg<_Context>{
- __arg, basic_string_view<typename _Context::char_type>{__value.data(), __value.size()}};
+ __arg, basic_string_view<__context_char_type>{__pbegin, static_cast<size_t>(__pzero - __pbegin)}};
+ } else
+ // When the _Traits or _Allocator are
diff erent an implicit conversion will fail.
+ return basic_format_arg<_Context>{__arg, basic_string_view<__context_char_type>{__value.data(), __value.size()}};
else if constexpr (__arg == __arg_t::__ptr)
return basic_format_arg<_Context>{__arg, static_cast<const void*>(__value)};
else if constexpr (__arg == __arg_t::__handle)
diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
index d71d19a4970ab..bad6a4d2bb899 100644
--- a/libcxx/include/__format/formatter_string.h
+++ b/libcxx/include/__format/formatter_string.h
@@ -10,6 +10,7 @@
#ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H
#define _LIBCPP___FORMAT_FORMATTER_STRING_H
+#include <__assert>
#include <__config>
#include <__format/concepts.h>
#include <__format/format_parse_context.h>
@@ -17,6 +18,7 @@
#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
#include <__format/write_escaped.h>
+#include <cstddef>
#include <string>
#include <string_view>
@@ -94,7 +96,9 @@ struct formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> {
template <class _FormatContext>
_LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
format(const _CharT (&__str)[_Size], _FormatContext& __ctx) const {
- return _Base::format(basic_string_view<_CharT>(__str, _Size), __ctx);
+ const _CharT* const __pzero = char_traits<_CharT>::find(__str, _Size, _CharT{});
+ _LIBCPP_ASSERT_VALID_INPUT_RANGE(__pzero != nullptr, "formatting a non-null-terminated array");
+ return _Base::format(basic_string_view<_CharT>(__str, static_cast<size_t>(__pzero - __str)), __ctx);
}
};
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp
new file mode 100644
index 0000000000000..1e9b1d93eb06f
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/assert.array.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <format>
+
+// Formatting non-null-terminated character arrays.
+
+// REQUIRES: std-at-least-c++20, has-unix-headers, libcpp-hardening-mode={{extensive|debug}}
+// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
+
+#include <format>
+
+#include "check_assertion.h"
+
+int main(int, char**) {
+ {
+ const char non_null_terminated[3]{'1', '2', '3'};
+ TEST_LIBCPP_ASSERT_FAILURE(std::format("{}", non_null_terminated), "formatting a non-null-terminated array");
+ }
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+ {
+ const wchar_t non_null_terminated[3]{L'1', L'2', L'3'};
+ TEST_LIBCPP_ASSERT_FAILURE(std::format(L"{}", non_null_terminated), "formatting a non-null-terminated array");
+ }
+#endif
+
+ return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp
index 1b3ff52d22d58..bc056db9e254e 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp
@@ -41,13 +41,10 @@ struct Tester {
constexpr Tester(const char (&r)[N]) { __builtin_memcpy(text, r, N); }
char text[N];
- // The size of the array shouldn't include the NUL character.
- static const std::size_t size = N - 1;
-
template <class CharT>
void
test(const std::basic_string<CharT>& expected, const std::basic_string_view<CharT>& fmt, std::size_t offset) const {
- using Str = CharT[size];
+ using Str = CharT[N];
std::basic_format_parse_context<CharT> parse_ctx{fmt};
std::formatter<Str, CharT> formatter;
static_assert(std::semiregular<decltype(formatter)>);
@@ -57,16 +54,25 @@ struct Tester {
assert(std::to_address(it) == std::to_address(fmt.end()) - offset);
std::basic_string<CharT> result;
- auto out = std::back_inserter(result);
+ auto out = std::back_inserter(result);
using FormatCtxT = std::basic_format_context<decltype(out), CharT>;
- std::basic_string<CharT> buffer{text, text + N};
- // Note not too found of this hack
- Str* data = reinterpret_cast<Str*>(const_cast<CharT*>(buffer.c_str()));
-
- FormatCtxT format_ctx =
- test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(*data));
- formatter.format(*data, format_ctx);
+ if constexpr (std::is_same_v<CharT, char>) {
+ FormatCtxT format_ctx =
+ test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(text));
+ formatter.format(text, format_ctx);
+ }
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+ else {
+ Str buffer;
+ for (std::size_t i = 0; i != N; ++i) {
+ buffer[i] = static_cast<CharT>(text[i]);
+ }
+ FormatCtxT format_ctx =
+ test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(buffer));
+ formatter.format(buffer, format_ctx);
+ }
+#endif
assert(result == expected);
}
@@ -118,8 +124,8 @@ template <class CharT>
void test_array() {
test_helper_wrapper<" azAZ09,./<>?">(STR(" azAZ09,./<>?"), STR("}"));
- std::basic_string<CharT> s(CSTR("abc\0abc"), 7);
- test_helper_wrapper<"abc\0abc">(s, STR("}"));
+ // Contents after embedded null terminator are not formatted.
+ test_helper_wrapper<"abc\0abc">(STR("abc"), STR("}"));
test_helper_wrapper<"world">(STR("world"), STR("}"));
test_helper_wrapper<"world">(STR("world"), STR("_>}"));
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 3969b341cb146..60abd4ac4e225 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -3189,6 +3189,15 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
const CharT* data = buffer;
check(SV("hello 09azAZ!"), SV("hello {}"), data);
}
+ {
+ // https://github.com/llvm/llvm-project/issues/115935
+ // Contents after the embedded null character are discarded.
+ CharT buffer[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f'), 0};
+ check(SV("hello abc"), SV("hello {}"), buffer);
+ // Even when the last element of the array is not null character.
+ CharT buffer2[] = {CharT('a'), CharT('b'), CharT('c'), 0, CharT('d'), CharT('e'), CharT('f')};
+ check(SV("hello abc"), SV("hello {}"), buffer2);
+ }
{
std::basic_string<CharT> data = STR("world");
check(SV("hello world"), SV("hello {}"), data);
More information about the libcxx-commits
mailing list