[libcxx-commits] [libcxx] 9393060 - [libc++] Fixes std::to_chars for bases != 10.

Mark de Wever via libcxx-commits libcxx-commits at lists.llvm.org
Thu Apr 29 10:56:36 PDT 2021


Author: Mark de Wever
Date: 2021-04-29T19:56:28+02:00
New Revision: 9393060f908b95f30267f2122b3a2aadb698aadb

URL: https://github.com/llvm/llvm-project/commit/9393060f908b95f30267f2122b3a2aadb698aadb
DIFF: https://github.com/llvm/llvm-project/commit/9393060f908b95f30267f2122b3a2aadb698aadb.diff

LOG: [libc++] Fixes std::to_chars for bases != 10.

While working on D70631, Microsoft's unit tests discovered an issue.
Our `std::to_chars` implementation for bases != 10 uses the range
`[first,last)` as temporary buffer. This violates the contract for
to_chars:
[charconv.to.chars]/1 http://eel.is/c++draft/charconv#to.chars-1
`to_chars_result to_chars(char* first, char* last, see below value, int base = 10);`
"If the member ec of the return value is such that the value is equal to
the value of a value-initialized errc, the conversion was successful and
the member ptr is the one-past-the-end pointer of the characters
written."

Our implementation modifies the range `[member ptr, last)`, which causes
Microsoft's test to fail. Their test verifies the buffer
`[member ptr, last)` is unchanged. (The test is only done when the
conversion is successful.)

While looking at the code I noticed the performance for bases != 10 also
is suboptimal. This is tracked in D97705.

This patch fixes the issue and adds a benchmark. This benchmark will be
used as baseline for D97705.

Reviewed By: #libc, Quuxplusone, zoecarver

Differential Revision: https://reviews.llvm.org/D100722

Added: 
    libcxx/benchmarks/to_chars.bench.cpp

Modified: 
    libcxx/include/charconv
    libcxx/test/support/charconv_test_helpers.h

Removed: 
    


################################################################################
diff  --git a/libcxx/benchmarks/to_chars.bench.cpp b/libcxx/benchmarks/to_chars.bench.cpp
new file mode 100644
index 0000000000000..1a3dc64497ba7
--- /dev/null
+++ b/libcxx/benchmarks/to_chars.bench.cpp
@@ -0,0 +1,58 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <array>
+#include <charconv>
+#include <random>
+
+#include "benchmark/benchmark.h"
+#include "test_macros.h"
+
+static const std::array<unsigned, 1000> input = [] {
+  std::mt19937 generator;
+  std::uniform_int_distribution<unsigned> distribution(0, std::numeric_limits<unsigned>::max());
+  std::array<unsigned, 1000> result;
+  std::generate_n(result.begin(), result.size(), [&] { return distribution(generator); });
+  return result;
+}();
+
+static void BM_to_chars_good(benchmark::State& state) {
+  char buffer[128];
+  int base = state.range(0);
+  while (state.KeepRunningBatch(input.size()))
+    for (auto value : input)
+      benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[128], value, base));
+}
+BENCHMARK(BM_to_chars_good)->DenseRange(2, 36, 1);
+
+static void BM_to_chars_bad(benchmark::State& state) {
+  char buffer[128];
+  int base = state.range(0);
+  struct sample {
+    unsigned size;
+    unsigned value;
+  };
+  std::array<sample, 1000> data;
+  // Assume the failure occurs, on average, halfway during the conversion.
+  std::transform(input.begin(), input.end(), data.begin(), [&](unsigned value) {
+    std::to_chars_result result = std::to_chars(buffer, &buffer[128], value, base);
+    return sample{unsigned((result.ptr - buffer) / 2), value};
+  });
+
+  while (state.KeepRunningBatch(data.size()))
+    for (auto element : data)
+      benchmark::DoNotOptimize(std::to_chars(buffer, &buffer[element.size], element.value, base));
+}
+BENCHMARK(BM_to_chars_bad)->DenseRange(2, 36, 1);
+
+int main(int argc, char** argv) {
+  benchmark::Initialize(&argc, argv);
+  if (benchmark::ReportUnrecognizedArguments(argc, argv))
+    return 1;
+
+  benchmark::RunSpecifiedBenchmarks();
+}

diff  --git a/libcxx/include/charconv b/libcxx/include/charconv
index 8113552193478..64a2d03520205 100644
--- a/libcxx/include/charconv
+++ b/libcxx/include/charconv
@@ -79,6 +79,7 @@ namespace std {
 #include <__utility/to_underlying.h>
 #include <cmath> // for log2f
 #include <cstdint>
+#include <cstdlib> // for _LIBCPP_UNREACHABLE
 #include <cstring>
 #include <limits>
 #include <type_traits>
@@ -400,33 +401,54 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
     return __to_chars_integral(__first, __last, __x, __base, false_type());
 }
 
+template <typename _Tp>
+_LIBCPP_AVAILABILITY_TO_CHARS _LIBCPP_INLINE_VISIBILITY int __to_chars_integral_width(_Tp __value, unsigned __base) {
+  _LIBCPP_ASSERT(__value >= 0, "The function requires a non-negative value.");
+
+  unsigned __base_2 = __base * __base;
+  unsigned __base_3 = __base_2 * __base;
+  unsigned __base_4 = __base_2 * __base_2;
+
+  int __r = 0;
+  while (true) {
+    if (__value < __base)
+      return __r + 1;
+    if (__value < __base_2)
+      return __r + 2;
+    if (__value < __base_3)
+      return __r + 3;
+    if (__value < __base_4)
+      return __r + 4;
+
+    __value /= __base_4;
+    __r += 4;
+  }
+
+  _LIBCPP_UNREACHABLE();
+}
+
 template <typename _Tp>
 _LIBCPP_AVAILABILITY_TO_CHARS
 inline _LIBCPP_INLINE_VISIBILITY to_chars_result
 __to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
                     false_type)
 {
-    if (__base == 10)
-        return __to_chars_itoa(__first, __last, __value, false_type());
-
-    auto __p = __last;
-    while (__p != __first)
-    {
-        auto __c = __value % __base;
-        __value /= __base;
-        *--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
-        if (__value == 0)
-            break;
-    }
-
-    auto __len = __last - __p;
-    if (__value != 0 || !__len)
-        return {__last, errc::value_too_large};
-    else
-    {
-        _VSTD::memmove(__first, __p, __len);
-        return {__first + __len, {}};
-    }
+  if (__base == 10)
+    return __to_chars_itoa(__first, __last, __value, false_type());
+
+  ptr
diff _t __cap = __last - __first;
+  int __n = __to_chars_integral_width(__value, __base);
+  if (__n > __cap)
+    return {__last, errc::value_too_large};
+
+  __last = __first + __n;
+  char* __p = __last;
+  do {
+    unsigned __c = __value % __base;
+    __value /= __base;
+    *--__p = "0123456789abcdefghijklmnopqrstuvwxyz"[__c];
+  } while (__value != 0);
+  return {__last, errc(0)};
 }
 
 template <typename _Tp, typename enable_if<is_integral<_Tp>::value, int>::type = 0>

diff  --git a/libcxx/test/support/charconv_test_helpers.h b/libcxx/test/support/charconv_test_helpers.h
index 51e6a56b8a8e9..bbce485976b13 100644
--- a/libcxx/test/support/charconv_test_helpers.h
+++ b/libcxx/test/support/charconv_test_helpers.h
@@ -12,6 +12,7 @@
 #include <charconv>
 #include <cassert>
 #include <limits>
+#include <numeric>
 #include <string.h>
 #include <stdlib.h>
 
@@ -105,8 +106,13 @@ struct to_chars_test_base
         using std::to_chars;
         std::to_chars_result r;
 
+        // Poison the buffer for testing whether a successful std::to_chars
+        // doesn't modify data beyond r.ptr.
+        std::iota(buf, buf + sizeof(buf), 1);
         r = to_chars(buf, buf + sizeof(buf), v, args...);
         assert(r.ec == std::errc{});
+        for (size_t i = r.ptr - buf; i < sizeof(buf); ++i)
+            assert(buf[i] == static_cast<char>(i + 1));
         *r.ptr = '\0';
 
         auto a = fromchars(buf, r.ptr, args...);


        


More information about the libcxx-commits mailing list