[libcxx-commits] [libcxx] [libc++] Fix `money_get::do_get` with huge input (PR #126273)

A. Jiang via libcxx-commits libcxx-commits at lists.llvm.org
Thu Feb 27 02:59:51 PST 2025


https://github.com/frederick-vs-ja updated https://github.com/llvm/llvm-project/pull/126273

>From 61775dc1fc7ff13f79cb573eec5cd5f7b8800081 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Tue, 25 Feb 2025 19:33:55 +0800
Subject: [PATCH 1/2] [libc++] Fix money_get::do_get with huge input

`money_get::do_get` needs to be fixed to handle extremely huge input
(e.g. more than 100 digits).
1. `__double_or_nothing` needs to copy the contents of the stack buffer
on the initial allocation.
2. The `sscanf` call in `do_get` needs to scan the dynamic buffer if
dynamic allocation happens.

The fix should be backported to frozen cxx03 headers as the previously
wrong handling caused core language UB.
---
 libcxx/include/__cxx03/locale                 |  10 +-
 libcxx/include/locale                         |  10 +-
 .../get_long_double_overlong.pass.cpp         | 113 ++++++++++++++++++
 3 files changed, 127 insertions(+), 6 deletions(-)
 create mode 100644 libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp

diff --git a/libcxx/include/__cxx03/locale b/libcxx/include/__cxx03/locale
index 6360bbc2f6b60..a83ebacb5adbe 100644
--- a/libcxx/include/__cxx03/locale
+++ b/libcxx/include/__cxx03/locale
@@ -2460,6 +2460,8 @@ _LIBCPP_HIDE_FROM_ABI void __double_or_nothing(unique_ptr<_Tp, void (*)(void*)>&
     __throw_bad_alloc();
   if (__owns)
     __b.release();
+  else
+    std::memcpy(__t, __b.get(), __cur_cap);
   __b = unique_ptr<_Tp, void (*)(void*)>(__t, free);
   __new_cap /= sizeof(_Tp);
   __n = __b.get() + __n_off;
@@ -2655,20 +2657,22 @@ _InputIterator money_get<_CharT, _InputIterator>::do_get(
     char_type __atoms[sizeof(__src) - 1];
     __ct.widen(__src, __src + (sizeof(__src) - 1), __atoms);
     char __nbuf[__bz];
-    char* __nc = __nbuf;
+    char* __nc          = __nbuf;
+    const char* __nc_in = __nc;
     unique_ptr<char, void (*)(void*)> __h(nullptr, free);
     if (__wn - __wb.get() > __bz - 2) {
       __h.reset((char*)malloc(static_cast<size_t>(__wn - __wb.get() + 2)));
       if (__h.get() == nullptr)
         __throw_bad_alloc();
-      __nc = __h.get();
+      __nc    = __h.get();
+      __nc_in = __nc;
     }
     if (__neg)
       *__nc++ = '-';
     for (const char_type* __w = __wb.get(); __w < __wn; ++__w, ++__nc)
       *__nc = __src[std::find(__atoms, std::end(__atoms), *__w) - __atoms];
     *__nc = char();
-    if (sscanf(__nbuf, "%Lf", &__v) != 1)
+    if (sscanf(__nc_in, "%Lf", &__v) != 1)
       __throw_runtime_error("money_get error");
   }
   if (__b == __e)
diff --git a/libcxx/include/locale b/libcxx/include/locale
index 86e327fde5695..a14ce5a9aaf7d 100644
--- a/libcxx/include/locale
+++ b/libcxx/include/locale
@@ -2385,6 +2385,8 @@ _LIBCPP_HIDE_FROM_ABI void __double_or_nothing(unique_ptr<_Tp, void (*)(void*)>&
     std::__throw_bad_alloc();
   if (__owns)
     __b.release();
+  else
+    std::memcpy(__t, __b.get(), __cur_cap);
   __b = unique_ptr<_Tp, void (*)(void*)>(__t, free);
   __new_cap /= sizeof(_Tp);
   __n = __b.get() + __n_off;
@@ -2580,20 +2582,22 @@ _InputIterator money_get<_CharT, _InputIterator>::do_get(
     char_type __atoms[sizeof(__src) - 1];
     __ct.widen(__src, __src + (sizeof(__src) - 1), __atoms);
     char __nbuf[__bz];
-    char* __nc = __nbuf;
+    char* __nc          = __nbuf;
+    const char* __nc_in = __nc;
     unique_ptr<char, void (*)(void*)> __h(nullptr, free);
     if (__wn - __wb.get() > __bz - 2) {
       __h.reset((char*)malloc(static_cast<size_t>(__wn - __wb.get() + 2)));
       if (__h.get() == nullptr)
         std::__throw_bad_alloc();
-      __nc = __h.get();
+      __nc    = __h.get();
+      __nc_in = __nc;
     }
     if (__neg)
       *__nc++ = '-';
     for (const char_type* __w = __wb.get(); __w < __wn; ++__w, ++__nc)
       *__nc = __src[std::find(__atoms, std::end(__atoms), *__w) - __atoms];
     *__nc = char();
-    if (sscanf(__nbuf, "%Lf", &__v) != 1)
+    if (sscanf(__nc_in, "%Lf", &__v) != 1)
       std::__throw_runtime_error("money_get error");
   }
   if (__b == __e)
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp
new file mode 100644
index 0000000000000..5966f03122338
--- /dev/null
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp
@@ -0,0 +1,113 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <locale>
+
+// class money_get<charT, InputIterator>
+
+// iter_type get(iter_type b, iter_type e, bool intl, ios_base& iob,
+//               ios_base::iostate& err, long double& v) const;
+
+#include <cassert>
+#include <cstddef>
+#include <ios>
+#include <locale>
+#include <streambuf>
+#include <string>
+
+#include "test_macros.h"
+#include "test_iterators.h"
+
+typedef std::money_get<char, cpp17_input_iterator<const char*> > Fn;
+
+class my_facet : public Fn {
+public:
+  explicit my_facet(std::size_t refs = 0) : Fn(refs) {}
+};
+
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+typedef std::money_get<wchar_t, cpp17_input_iterator<const wchar_t*> > Fw;
+
+class my_facetw : public Fw {
+public:
+  explicit my_facetw(std::size_t refs = 0) : Fw(refs) {}
+};
+#endif
+
+int main(int, char**) {
+  struct digit_result_case {
+    std::size_t digit;
+    long double result;
+  };
+  const digit_result_case digit_result_cases[] = {
+      {60, 2.0E60L}, {120, 2.0E120L}, {180, 2.0E180L}, {240, 2.0E240L}, {300, 2.0E300L}};
+
+  std::ios ios(0);
+  {
+    const my_facet f(1);
+    for (std::size_t i = 0; i != sizeof(digit_result_cases) / sizeof(digit_result_cases[0]); ++i) {
+      {
+        std::string v = "2";
+        v.append(digit_result_cases[i].digit, '0');
+
+        typedef cpp17_input_iterator<const char*> I;
+        long double ex;
+        std::ios_base::iostate err = std::ios_base::goodbit;
+        I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
+        assert(base(iter) == v.data() + v.size());
+        assert(err == std::ios_base::eofbit);
+        assert(ex == digit_result_cases[i].result);
+      }
+      {
+        std::string v = "-2";
+        v.append(digit_result_cases[i].digit, '0');
+
+        typedef cpp17_input_iterator<const char*> I;
+        long double ex;
+        std::ios_base::iostate err = std::ios_base::goodbit;
+        I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
+        assert(base(iter) == v.data() + v.size());
+        assert(err == std::ios_base::eofbit);
+        assert(ex == -digit_result_cases[i].result);
+      }
+    }
+  }
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  {
+    const my_facetw f(1);
+    for (std::size_t i = 0; i != sizeof(digit_result_cases) / sizeof(digit_result_cases[0]); ++i) {
+      {
+        std::wstring v = L"2";
+        v.append(digit_result_cases[i].digit, L'0');
+
+        typedef cpp17_input_iterator<const wchar_t*> I;
+        long double ex;
+        std::ios_base::iostate err = std::ios_base::goodbit;
+        I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
+        assert(base(iter) == v.data() + v.size());
+        assert(err == std::ios_base::eofbit);
+        assert(ex == digit_result_cases[i].result);
+      }
+      {
+        std::wstring v = L"-2";
+        v.append(digit_result_cases[i].digit, L'0');
+
+        typedef cpp17_input_iterator<const wchar_t*> I;
+        long double ex;
+        std::ios_base::iostate err = std::ios_base::goodbit;
+        I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
+        assert(base(iter) == v.data() + v.size());
+        assert(err == std::ios_base::eofbit);
+        assert(ex == -digit_result_cases[i].result);
+      }
+    }
+  }
+#endif
+
+  return 0;
+}

>From 119ed439d5d66fc6f6ff5ac9caff882c3ea35524 Mon Sep 17 00:00:00 2001
From: "A. Jiang" <de34 at live.cn>
Date: Thu, 27 Feb 2025 18:59:29 +0800
Subject: [PATCH 2/2] Address @mordante's review comments on the test

---
 .../get_long_double_overlong.pass.cpp         | 76 +++++++++----------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp
index 5966f03122338..0b7a38e5104cd 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_overlong.pass.cpp
@@ -13,6 +13,9 @@
 // iter_type get(iter_type b, iter_type e, bool intl, ios_base& iob,
 //               ios_base::iostate& err, long double& v) const;
 
+// Ensure that money_get::do_get correct works when the input doesn't fit into the stack buffer
+// (100 characters currently).
+
 #include <cassert>
 #include <cstddef>
 #include <ios>
@@ -20,26 +23,21 @@
 #include <streambuf>
 #include <string>
 
+#include "make_string.h"
 #include "test_macros.h"
 #include "test_iterators.h"
 
-typedef std::money_get<char, cpp17_input_iterator<const char*> > Fn;
-
-class my_facet : public Fn {
-public:
-  explicit my_facet(std::size_t refs = 0) : Fn(refs) {}
-};
-
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
-typedef std::money_get<wchar_t, cpp17_input_iterator<const wchar_t*> > Fw;
+template <class CharT>
+class my_basic_facet : public std::money_get<CharT, cpp17_input_iterator<const CharT*> > {
+private:
+  typedef std::money_get<CharT, cpp17_input_iterator<const CharT*> > Base;
 
-class my_facetw : public Fw {
 public:
-  explicit my_facetw(std::size_t refs = 0) : Fw(refs) {}
+  explicit my_basic_facet(std::size_t refs = 0) : Base(refs) {}
 };
-#endif
 
-int main(int, char**) {
+template <class CharT>
+void test() {
   struct digit_result_case {
     std::size_t digit;
     long double result;
@@ -49,13 +47,13 @@ int main(int, char**) {
 
   std::ios ios(0);
   {
-    const my_facet f(1);
+    const my_basic_facet<CharT> f(1);
     for (std::size_t i = 0; i != sizeof(digit_result_cases) / sizeof(digit_result_cases[0]); ++i) {
       {
-        std::string v = "2";
-        v.append(digit_result_cases[i].digit, '0');
+        std::basic_string<CharT> v = MAKE_STRING(CharT, "2");
+        v.append(digit_result_cases[i].digit, static_cast<CharT>('0'));
 
-        typedef cpp17_input_iterator<const char*> I;
+        typedef cpp17_input_iterator<const CharT*> I;
         long double ex;
         std::ios_base::iostate err = std::ios_base::goodbit;
         I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
@@ -64,10 +62,10 @@ int main(int, char**) {
         assert(ex == digit_result_cases[i].result);
       }
       {
-        std::string v = "-2";
-        v.append(digit_result_cases[i].digit, '0');
+        std::basic_string<CharT> v = MAKE_STRING(CharT, "-2");
+        v.append(digit_result_cases[i].digit, static_cast<CharT>('0'));
 
-        typedef cpp17_input_iterator<const char*> I;
+        typedef cpp17_input_iterator<const CharT*> I;
         long double ex;
         std::ios_base::iostate err = std::ios_base::goodbit;
         I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
@@ -75,38 +73,40 @@ int main(int, char**) {
         assert(err == std::ios_base::eofbit);
         assert(ex == -digit_result_cases[i].result);
       }
-    }
-  }
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
-  {
-    const my_facetw f(1);
-    for (std::size_t i = 0; i != sizeof(digit_result_cases) / sizeof(digit_result_cases[0]); ++i) {
       {
-        std::wstring v = L"2";
-        v.append(digit_result_cases[i].digit, L'0');
+        std::basic_string<CharT> v = MAKE_STRING(CharT, "0.");
+        v.append(digit_result_cases[i].digit, static_cast<CharT>('0'));
+        v += MAKE_CSTRING(CharT, "2");
 
-        typedef cpp17_input_iterator<const wchar_t*> I;
+        typedef cpp17_input_iterator<const CharT*> I;
         long double ex;
         std::ios_base::iostate err = std::ios_base::goodbit;
         I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
-        assert(base(iter) == v.data() + v.size());
-        assert(err == std::ios_base::eofbit);
-        assert(ex == digit_result_cases[i].result);
+        assert(base(iter) == v.data() + 1);
+        assert(err == std::ios_base::goodbit);
+        assert(ex == 0.0L);
       }
       {
-        std::wstring v = L"-2";
-        v.append(digit_result_cases[i].digit, L'0');
+        std::basic_string<CharT> v = MAKE_STRING(CharT, "-0.");
+        v.append(digit_result_cases[i].digit, static_cast<CharT>('0'));
+        v += MAKE_CSTRING(CharT, "2");
 
-        typedef cpp17_input_iterator<const wchar_t*> I;
+        typedef cpp17_input_iterator<const CharT*> I;
         long double ex;
         std::ios_base::iostate err = std::ios_base::goodbit;
         I iter                     = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex);
-        assert(base(iter) == v.data() + v.size());
-        assert(err == std::ios_base::eofbit);
-        assert(ex == -digit_result_cases[i].result);
+        assert(base(iter) == v.data() + 2);
+        assert(err == std::ios_base::goodbit);
+        assert(ex == 0.0L);
       }
     }
   }
+}
+
+int main(int, char**) {
+  test<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
 #endif
 
   return 0;



More information about the libcxx-commits mailing list