[libcxx-commits] [libcxx] [libc++][test] Adds transcode option. (PR #73395)

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Tue Dec 5 10:03:31 PST 2023


================
@@ -16,15 +16,153 @@
 #include "test_macros.h"
 
 #ifndef TEST_HAS_NO_LOCALIZATION
+#  include <concepts>
+#  include <iterator>
 #  include <sstream>
 #endif
 
 #if TEST_STD_VER > 17
 
 #  ifndef TEST_HAS_NO_LOCALIZATION
+
+[[nodiscard]] constexpr bool test_is_high_surrogate(char32_t value) { return value >= 0xd800 && value <= 0xdbff; }
+
+[[nodiscard]] constexpr bool test_is_low_surrogate(char32_t value) { return value >= 0xdc00 && value <= 0xdfff; }
+
+[[nodiscard]] constexpr bool test_is_surrogate(char32_t value) { return value >= 0xd800 && value <= 0xdfff; }
+
+[[nodiscard]] constexpr bool test_is_code_point(char32_t value) { return value <= 0x10ffff; }
+
+[[nodiscard]] constexpr bool test_is_scalar_value(char32_t value) {
+  return test_is_code_point(value) && !test_is_surrogate(value);
+}
+
+inline constexpr char32_t test_replacement_character = U'\ufffd';
+
+template <class InIt, class OutIt>
+OutIt test_transcode() = delete;
+
+template <class InIt, class OutIt>
+  requires(std::output_iterator<OutIt, const char&> && std::same_as<std::iter_value_t<InIt>, char8_t>)
+OutIt test_transcode(InIt first, InIt last, OutIt out_it) {
+  return std::copy(first, last, out_it);
+}
+
+template <class OutIt>
+  requires std::output_iterator<OutIt, const char&>
+void test_encode(OutIt& out_it, char16_t value) {
+  if (value < 0x80)
+    *out_it++ = value;
+  else if (value < 0x800) {
+    *out_it++ = 0b11000000 | (value >> 6);
+    *out_it++ = 0b10000000 | (value & 0b00111111);
+  } else {
+    *out_it++ = 0b11100000 | (value >> 12);
+    *out_it++ = 0b10000000 | ((value) >> 6 & 0b00111111);
+    *out_it++ = 0b10000000 | (value & 0b00111111);
+  }
+}
+
+template <class OutIt>
+  requires std::output_iterator<OutIt, const char&>
+void test_encode(OutIt& out_it, char32_t value) {
+  if ((value & 0xffff0000) == 0)
+    test_encode(out_it, static_cast<char16_t>(value));
+  else {
+    *out_it++ = 0b11100000 | (value >> 18);
+    *out_it++ = 0b10000000 | ((value) >> 12 & 0b00111111);
+    *out_it++ = 0b10000000 | ((value) >> 6 & 0b00111111);
+    *out_it++ = 0b10000000 | (value & 0b00111111);
+  }
+}
+
+template <class InIt, class OutIt>
+  requires(std::output_iterator<OutIt, const char&> &&
+           (std::same_as<std::iter_value_t<InIt>, char16_t>
+#    ifndef TEST_HAS_NO_WIDE_CHARACTERS
+            || (std::same_as<std::iter_value_t<InIt>, wchar_t> && sizeof(wchar_t) == 2))
+#    endif
+               )
+OutIt test_transcode(InIt first, InIt last, OutIt out_it) {
+  while (first != last) {
+    char32_t value = *first++;
+
+    if (test_is_low_surrogate(value)) [[unlikely]] {
+      test_encode(out_it, static_cast<char16_t>(test_replacement_character));
+      continue;
+    }
+
+    if (!test_is_high_surrogate(value)) {
+      test_encode(out_it, static_cast<char16_t>(value));
+      continue;
+    }
+
+    if (first == last || !test_is_low_surrogate(static_cast<char32_t>(*first))) [[unlikely]] {
+      test_encode(out_it, static_cast<char16_t>(test_replacement_character));
+      continue;
+    }
+
+    value -= 0xd800;
+    value <<= 10;
+    value += static_cast<char32_t>(*first++) - 0xdc00;
+    value += 0x10000;
+
+    if (test_is_code_point(value)) [[likely]]
+      test_encode(out_it, value);
+    else
+      test_encode(out_it, static_cast<char16_t>(test_replacement_character));
+  }
+
+  return out_it;
+}
+
+template <class InIt, class OutIt>
+  requires(std::output_iterator<OutIt, const char&> &&
+           (std::same_as<std::iter_value_t<InIt>, char32_t> ||
+#    ifndef TEST_HAS_NO_WIDE_CHARACTERS
+            (std::same_as<std::iter_value_t<InIt>, wchar_t> && sizeof(wchar_t) == 4))
+#    endif
+               )
+OutIt test_transcode(InIt first, InIt last, OutIt out_it) {
+  while (first != last) {
+    char32_t value = *first++;
+    if (test_is_code_point(value)) [[likely]]
+      test_encode(out_it, value);
+    else
+      test_encode(out_it, static_cast<char16_t>(test_replacement_character));
+  }
+  return out_it;
+}
+
 template <class T>
-concept test_char_streamable = requires(T&& value) { std::stringstream{} << std::forward<T>(value); };
-#  endif
+concept test_streamable = requires(std::stringstream& stream, T&& value) { stream << value; };
+
+template <class T>
+concept test_convertable = (!test_streamable<T> && requires(T&& value) {
+  std::basic_string_view{std::begin(value), std::end(value)};
+});
----------------
ldionne wrote:

Since `value` is basically some kind of range, perhaps we should call it `rng` or `range` or something like that. And `T` should probably be `Range`.

https://github.com/llvm/llvm-project/pull/73395


More information about the libcxx-commits mailing list