[llvm-branch-commits] [libc] [libc][wctype] Upstream common utils header file from PtrHash-cc prototype to LLVM libc (PR #174798)
Muhammad Bassiouni via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 7 10:24:44 PST 2026
https://github.com/bassiounix updated https://github.com/llvm/llvm-project/pull/174798
>From 6a86e8fda01ed644b152f449cd00b6affb94c968 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Wed, 7 Jan 2026 18:30:08 +0200
Subject: [PATCH 1/3] [libc][wctype] Upstream common utils header file from
PtrHash-cc prototype to LLVM libc
---
.../wctype/conversion/utils/CMakeLists.txt | 12 +
.../__support/wctype/conversion/utils/utils.h | 212 ++++++++++++++++++
2 files changed, 224 insertions(+)
create mode 100644 libc/src/__support/wctype/conversion/utils/utils.h
diff --git a/libc/src/__support/wctype/conversion/utils/CMakeLists.txt b/libc/src/__support/wctype/conversion/utils/CMakeLists.txt
index 9e119f38641ff..2dcf45e4a0105 100644
--- a/libc/src/__support/wctype/conversion/utils/CMakeLists.txt
+++ b/libc/src/__support/wctype/conversion/utils/CMakeLists.txt
@@ -10,6 +10,18 @@ add_header_library(
libc.src.__support.libc_assert
)
+add_header_library(
+ utils
+ HDRS
+ utils.h
+ DEPENDS
+ .slice
+ libc.hdr.stdint_proxy
+ libc.src.__support.common
+ libc.src.__support.libc_assert
+ libc.src.__support.uint128
+)
+
add_header_library(
zip
HDRS
diff --git a/libc/src/__support/wctype/conversion/utils/utils.h b/libc/src/__support/wctype/conversion/utils/utils.h
new file mode 100644
index 0000000000000..f91db19948e81
--- /dev/null
+++ b/libc/src/__support/wctype/conversion/utils/utils.h
@@ -0,0 +1,212 @@
+//===-- Internal utils for wctype conversion code - common ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCTYPE_CONVERSION_UTILS_UTILS_H
+#define LLVM_LIBC_SRC___SUPPORT_WCTYPE_CONVERSION_UTILS_UTILS_H
+
+#include "hdr/stdint_proxy.h"
+#include "slice.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/uint128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace internal_wctype_conversion_utils {
+
+// Multiplies two 64-bit unsigned integers and returns the high 64 bits
+LIBC_INLINE constexpr uint64_t mul_high(uint64_t a, uint64_t b) {
+ return (static_cast<UInt128>(a) * static_cast<UInt128>(b)) >> 64;
+}
+
+// Computes the ceiling of the division of a by b
+template <typename T> LIBC_INLINE static constexpr T div_ceil(T a, T b) {
+ // works for positive or negative, matches "round toward +infinity"
+ LIBC_ASSERT(b != 0);
+
+ T quotient = a / b;
+ T remainder = a % b;
+
+ // If there is a remainder AND the division is not already upward
+ if (remainder != 0 && ((a > 0) == (b > 0))) {
+ quotient += 1;
+ }
+
+ return quotient;
+}
+
+// Checks if a number is a power of two
+template <typename T>
+LIBC_INLINE static constexpr bool is_power_of_two(T number) {
+ static_assert(cpp::is_unsigned_v<T>,
+ "is_power_of_two requires unsigned type");
+ return number != 0 && (number & (number - 1)) == 0;
+}
+
+// Checks if a signed number is a power of two
+template <typename T>
+LIBC_INLINE static constexpr bool is_power_of_two_signed(T number) {
+ static_assert(cpp::is_signed_v<T>,
+ "is_power_of_two_signed requires signed type");
+ return number > 0 && (number & (number - 1)) == 0;
+}
+
+// Maps a function over an array and returns the resulting array
+template <typename T, typename Fn, size_t N>
+LIBC_INLINE static constexpr auto map(const cpp::array<T, N> &container,
+ Fn func) {
+ using R = cpp::invoke_result_t<Fn, T>;
+ cpp::array<R, N> out{};
+
+ for (size_t i = 0; i < N; ++i) {
+ out[i] = func(container[i]);
+ }
+
+ return out;
+}
+
+// Applies func to each element of an iterable, returning false on early exit
+template <typename Iterator, typename Fn>
+LIBC_INLINE static constexpr auto try_for_each(Iterator &&iter, Fn &&func) {
+ for (auto &&x : iter) {
+ if (!func(x)) {
+ return false; // early exit
+ }
+ }
+
+ return true;
+}
+
+// Sums up all elements in a container. Alternative to accumulate.
+template <typename T> LIBC_INLINE static constexpr T sum(Slice<T> container) {
+ size_t acc = 0;
+
+ for (T const item : container) {
+ acc += item;
+ }
+
+ return acc;
+}
+
+// Wrapping addition for integral types
+template <typename T> LIBC_INLINE static constexpr T wrapping_add(T a, T b) {
+ static_assert(cpp::is_integral_v<T>, "wrapping_add requires integral type");
+
+ while (b != 0) {
+ T carry = a & b;
+ a = a ^ b;
+ b = carry << 1;
+ }
+ return a;
+}
+
+// Wrapping multiplication for integral types
+template <typename T> LIBC_INLINE static constexpr T wrapping_mul(T a, T b) {
+ static_assert(cpp::is_integral_v<T>, "wrapping_mul requires integral type");
+
+ T result = 0;
+
+ while (b != 0) {
+ if (b & 1) {
+ result = result + a;
+ }
+ a = a << 1;
+ b = static_cast<cpp::make_unsigned_t<T>>(b) >> 1;
+ }
+
+ return result;
+}
+
+// Counts the number of zero elements in an array
+template <typename T, size_t N>
+LIBC_INLINE static constexpr auto count_zeros(cpp::array<T, N> &container) {
+ size_t counter = 0;
+
+ for (auto element : container) {
+ if (!element) {
+ counter++;
+ }
+ }
+
+ return counter;
+}
+
+// Rotates bits to the right for unsigned integral types
+template <typename T>
+LIBC_INLINE static constexpr T rotate_right(T number, size_t rotation) {
+ static_assert(cpp::is_unsigned_v<T>, "rotate_right requires unsigned type");
+
+ constexpr size_t BITS = cpp::numeric_limits<T>::digits;
+ rotation %= BITS;
+ return (number >> rotation) | (number << (BITS - rotation));
+}
+
+// Converts a 32-bit unsigned integer to an array of 4 little-endian bytes
+LIBC_INLINE static constexpr cpp::array<uint8_t, 4>
+to_le_bytes(uint32_t number) {
+ return {
+ static_cast<uint8_t>(number),
+ static_cast<uint8_t>(number >> 8),
+ static_cast<uint8_t>(number >> 16),
+ static_cast<uint8_t>(number >> 24),
+ };
+}
+
+// Bit-casts a pointer of one type to another type. This is different from the
+// cpp::bit_cast in that it copies the bytes manually to local variable.
+template <typename To, typename From>
+LIBC_INLINE static constexpr To ptr_bit_cast(From *from) {
+ To to{};
+ char *dst = reinterpret_cast<char *>(&to);
+ const char *src = reinterpret_cast<const char *>(from);
+ for (unsigned i = 0; i < sizeof(To); ++i)
+ dst[i] = src[i];
+ return to;
+}
+
+// Sorts an array using merge sort algorithm
+template <typename T, size_t N>
+LIBC_INLINE static constexpr auto array_sort(cpp::array<T, N> &arr) {
+ if constexpr (N <= 1)
+ return arr; // base case
+
+ constexpr size_t MID = N / 2;
+
+ // Split array into left and right halves
+ cpp::array<T, MID> left{};
+ cpp::array<T, N - MID> right{};
+
+ for (size_t i = 0; i < MID; ++i)
+ left[i] = arr[i];
+ for (size_t i = MID; i < N; ++i)
+ right[i - MID] = arr[i];
+
+ // Recursively sort each half
+ left = array_sort(left);
+ right = array_sort(right);
+
+ // Merge halves
+ cpp::array<T, N> result{};
+ size_t li = 0, ri = 0, ki = 0;
+
+ while (li < MID && ri < N - MID) {
+ result[ki++] = (left[li] <= right[ri]) ? left[li++] : right[ri++];
+ }
+ while (li < MID)
+ result[ki++] = left[li++];
+ while (ri < N - MID)
+ result[ki++] = right[ri++];
+
+ return result;
+}
+
+} // namespace internal_wctype_conversion_utils
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCTYPE_CONVERSION_UTILS_UTILS_H
>From d1322c8e218b22154ead2f7e2727ba2b4d134925 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Wed, 7 Jan 2026 19:41:33 +0200
Subject: [PATCH 2/3] include range and chunk utils
---
.../__support/wctype/conversion/utils/utils.h | 167 +++++++++++++++++-
1 file changed, 166 insertions(+), 1 deletion(-)
diff --git a/libc/src/__support/wctype/conversion/utils/utils.h b/libc/src/__support/wctype/conversion/utils/utils.h
index f91db19948e81..130bba629bb8e 100644
--- a/libc/src/__support/wctype/conversion/utils/utils.h
+++ b/libc/src/__support/wctype/conversion/utils/utils.h
@@ -20,7 +20,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal_wctype_conversion_utils {
// Multiplies two 64-bit unsigned integers and returns the high 64 bits
-LIBC_INLINE constexpr uint64_t mul_high(uint64_t a, uint64_t b) {
+LIBC_INLINE static constexpr uint64_t mul_high(uint64_t a, uint64_t b) {
return (static_cast<UInt128>(a) * static_cast<UInt128>(b)) >> 64;
}
@@ -205,6 +205,171 @@ LIBC_INLINE static constexpr auto array_sort(cpp::array<T, N> &arr) {
return result;
}
+struct Range {
+ int start_range, end_range, step_range;
+
+ struct Iterator {
+ mutable int value;
+ mutable int step;
+
+ LIBC_INLINE constexpr int &operator*() const { return value; }
+
+ LIBC_INLINE constexpr const Iterator &operator++() const {
+ value += step;
+ return *this;
+ }
+
+ LIBC_INLINE constexpr bool operator!=(const Iterator &other) const {
+ return step > 0 ? value < other.value : value > other.value;
+ }
+ };
+
+ LIBC_INLINE constexpr Iterator begin() const {
+ return {start_range, step_range};
+ }
+ LIBC_INLINE constexpr Iterator end() const { return {end_range, step_range}; }
+
+ LIBC_INLINE constexpr auto rev() const {
+ int count = (end_range - start_range + step_range - 1) / step_range;
+ int new_start = start_range + (count - 1) * step_range;
+ int new_end = start_range - step_range;
+ return Range(new_start, new_end, -step_range);
+ }
+
+ LIBC_INLINE constexpr Range(int start, int end, int step = 1)
+ : start_range(start), end_range(end), step_range(step) {}
+
+ LIBC_INLINE constexpr Range(int end)
+ : start_range(0), end_range(end), step_range(1) {}
+
+ LIBC_INLINE constexpr size_t size() const {
+ if (step_range > 0)
+ return (end_range - start_range + step_range - 1) / step_range;
+ return (start_range - end_range - step_range - 1) / (-step_range);
+ }
+};
+
+template <typename T, size_t N> class ChunksMut {
+public:
+ struct Chunk {
+ cpp::array<T, N> &arr;
+ size_t chunk_begin;
+ size_t chunk_end;
+
+ LIBC_INLINE constexpr T &operator[](size_t i) const {
+ return arr[chunk_begin + i];
+ }
+
+ LIBC_INLINE constexpr T &at(size_t i) const {
+ // if (begin + i >= end)
+ // throw std::out_of_range("Chunk::at");
+ return arr[chunk_begin + i];
+ }
+
+ LIBC_INLINE constexpr T &front() const { return arr[chunk_begin]; }
+
+ LIBC_INLINE constexpr T &back() const { return arr[chunk_end - 1]; }
+
+ LIBC_INLINE constexpr size_t size() const noexcept {
+ return chunk_end - chunk_begin;
+ }
+
+ LIBC_INLINE constexpr bool empty() const noexcept {
+ return chunk_begin == chunk_end;
+ }
+
+ LIBC_INLINE constexpr auto begin_it() const {
+ return arr.begin() + static_cast<ptrdiff_t>(chunk_begin);
+ }
+ LIBC_INLINE constexpr auto end_it() const {
+ return arr.begin() + static_cast<ptrdiff_t>(chunk_end);
+ }
+
+ LIBC_INLINE constexpr auto begin() const { return begin_it(); }
+
+ LIBC_INLINE constexpr auto end() const { return end_it(); }
+
+ LIBC_INLINE constexpr T *data() const { return arr.data() + chunk_begin; }
+ };
+
+ class Iterator {
+ public:
+ LIBC_INLINE constexpr Iterator(cpp::array<T, N> &arr, size_t pos,
+ size_t chunk)
+ : arr(arr), index(pos), chunk_size(chunk) {}
+
+ LIBC_INLINE constexpr Chunk operator*() const {
+ size_t end = cpp::min(index + chunk_size, arr.size());
+ return Chunk{arr, index, end};
+ }
+
+ LIBC_INLINE constexpr const Iterator &operator++() const {
+ index += chunk_size;
+ return *this;
+ }
+
+ LIBC_INLINE constexpr bool operator!=(const Iterator &other) const {
+ return index != other.index;
+ }
+
+ private:
+ cpp::array<T, N> &arr;
+ mutable size_t index;
+ mutable size_t chunk_size;
+ };
+
+ LIBC_INLINE constexpr ChunksMut(cpp::array<T, N> &v, size_t chunk)
+ : arr(v), chunk_size(chunk) {
+ // static_assert(chunk_size == 0);
+ }
+
+ /// number of chunks
+ LIBC_INLINE constexpr size_t size() const {
+ return (arr.size() + chunk_size - 1) / chunk_size;
+ }
+
+ LIBC_INLINE constexpr bool empty() const { return arr.empty(); }
+
+ LIBC_INLINE constexpr Chunk operator[](size_t chunk_index) const {
+ // static_assert(chunk_index >= size());
+
+ size_t begin = chunk_index * chunk_size;
+ size_t end = cpp::min(begin + chunk_size, arr.size());
+
+ return Chunk{arr, begin, end};
+ }
+
+ LIBC_INLINE constexpr Iterator begin() const {
+ return Iterator(arr, 0, chunk_size);
+ }
+ LIBC_INLINE constexpr Iterator end() const {
+ return Iterator(arr, arr.size(), chunk_size);
+ }
+
+private:
+ cpp::array<T, N> &arr;
+ size_t chunk_size;
+};
+
+template <typename T, size_t N>
+LIBC_INLINE static constexpr ChunksMut<T, N> chunks_mut(cpp::array<T, N> &arr,
+ size_t chunk_size) {
+ return ChunksMut<T, N>(arr, chunk_size);
+}
+
+template <size_t buckets_total_, size_t buckets_>
+LIBC_INLINE static constexpr auto
+chunks_exact_mut(typename ChunksMut<uint8_t, buckets_total_>::Chunk &pilots) {
+ const auto num_chunks = pilots.size() / buckets_;
+
+ for (size_t i = 0; i < num_chunks; ++i) {
+ size_t begin = pilots.begin_ + i * buckets_;
+ size_t end = cpp::min(begin + buckets_, pilots.arr_.size());
+ auto target_pilots = typename ChunksMut<uint8_t, buckets_total_>::Chunk{
+ pilots.arr_, begin, end};
+ }
+}
+
} // namespace internal_wctype_conversion_utils
} // namespace LIBC_NAMESPACE_DECL
>From 6f9364a1cb7404f09f5fd6a20cadafd55a492cb7 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Wed, 7 Jan 2026 20:22:48 +0200
Subject: [PATCH 3/3] fix nesting
---
libc/src/__support/wctype/conversion/utils/utils.h | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/libc/src/__support/wctype/conversion/utils/utils.h b/libc/src/__support/wctype/conversion/utils/utils.h
index 130bba629bb8e..7738d8c166a43 100644
--- a/libc/src/__support/wctype/conversion/utils/utils.h
+++ b/libc/src/__support/wctype/conversion/utils/utils.h
@@ -17,7 +17,9 @@
namespace LIBC_NAMESPACE_DECL {
-namespace internal_wctype_conversion_utils {
+namespace wctype_internal {
+
+namespace conversion_utils {
// Multiplies two 64-bit unsigned integers and returns the high 64 bits
LIBC_INLINE static constexpr uint64_t mul_high(uint64_t a, uint64_t b) {
@@ -370,7 +372,9 @@ chunks_exact_mut(typename ChunksMut<uint8_t, buckets_total_>::Chunk &pilots) {
}
}
-} // namespace internal_wctype_conversion_utils
+} // namespace conversion_utils
+
+} // namespace wctype_internal
} // namespace LIBC_NAMESPACE_DECL
More information about the llvm-branch-commits
mailing list