[libc-commits] [libc] [libc][wctype] Add perfect hash map for conversion functions (PR #187670)
Muhammad Bassiouni via libc-commits
libc-commits at lists.llvm.org
Thu Mar 26 20:57:55 PDT 2026
================
@@ -0,0 +1,877 @@
+//===-- Perfect hash map for conversion functions ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCTYPE_PTR_HASH_H
+#define LLVM_LIBC_SRC___SUPPORT_WCTYPE_PTR_HASH_H
+
+#define LIBC_ENABLE_CONSTEXPR 1
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wint_t.h"
+#include "src/__support/CPP/array.h"
+#include "src/__support/CPP/expected.h"
+#include "src/__support/CPP/monostate.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/span.h"
+#include "src/__support/CPP/string.h"
+#include "src/__support/CPP/tuple.h"
+#include "src/__support/CPP/type_traits/is_unsigned.h"
+#include "src/__support/OSUtil/io.h"
+#include "src/__support/math/ceil.h"
+#include "src/__support/math/log.h"
+#include "src/__support/uint128.h"
+
+#undef LIBC_ENABLE_CONSTEXPR
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace ptrhash {
+
+LIBC_INLINE_VAR constexpr size_t SHARDS = 1;
+
+class FastRand {
+public:
+ // This seed value is very important for different inputs. Bad values are
+ // known to cause compilation errors and/or incorrect computations in some
+ // cases. Defaulted to 0xEF6F79ED30BA75A in the original implementation, but
+ // this is not sufficient. 0x64a727ea04c46a32 is another viable seed.
+ LIBC_INLINE constexpr FastRand() : seed(0xeec13c9f1362aa74) {}
+
+ LIBC_INLINE constexpr uint8_t gen_byte() {
+ return static_cast<uint8_t>(this->gen());
+ }
+
+ LIBC_INLINE constexpr uint64_t gen() {
+ constexpr uint64_t WY_CONST_0 = 0x2d35'8dcc'aa6c'78a5;
+ constexpr uint64_t WY_CONST_1 = 0x8bb8'4b93'962e'acc9;
+
+ auto s = wrapping_add(seed, WY_CONST_0);
+ seed = s;
+ auto const t =
+ static_cast<UInt128>(s) * static_cast<UInt128>(s ^ WY_CONST_1);
+ return static_cast<uint64_t>(t) ^ static_cast<uint64_t>(t >> 64);
+ }
+
+private:
+ template <typename T> LIBC_INLINE static constexpr T wrapping_add(T a, T b) {
+ while (b != 0) {
+ T carry = a & b;
+ a = a ^ b;
+ b = carry << 1;
+ }
+ return a;
+ }
+
+private:
+ uint64_t seed;
+};
+
+LIBC_INLINE_VAR constexpr auto BUCKET_IDX_NONE = ~static_cast<uint32_t>(0);
+
+template <size_t MaxSize = 5> class BinaryHeap {
+public:
+ LIBC_INLINE constexpr BinaryHeap() = default;
+
+ LIBC_INLINE constexpr void push(const cpp::tuple<size_t, uint32_t> &value) {
+ if (current_size >= MaxSize)
+ return;
+ data[current_size++] = value;
+ }
+
+ LIBC_INLINE constexpr cpp::tuple<size_t, uint32_t> pop() {
+ if (current_size == 0)
+ return {};
+ size_t max_idx = 0;
+ for (size_t i = 1; i < current_size; ++i) {
+ if (cmp(data[max_idx], data[i]))
+ max_idx = i;
+ }
+ auto top = data[max_idx];
+ data[max_idx] = data[current_size - 1];
+ --current_size;
+ return top;
+ }
+
+ LIBC_INLINE constexpr const cpp::tuple<size_t, uint32_t> &peek() const {
+ size_t max_idx = 0;
+ for (size_t i = 1; i < current_size; ++i) {
+ if (cmp(data[max_idx], data[i]))
+ max_idx = i;
+ }
+ return data[max_idx];
+ }
+
+ LIBC_INLINE constexpr bool empty() const { return current_size == 0; }
+
+private:
+ LIBC_INLINE static constexpr bool cmp(cpp::tuple<size_t, uint32_t> x,
+ cpp::tuple<size_t, uint32_t> y) {
+ return cpp::get<0>(x) < cpp::get<0>(y) ||
+ (!(cpp::get<0>(y) < cpp::get<0>(x)) &&
+ cpp::get<1>(x) < cpp::get<1>(y));
+ }
+
+private:
+ cpp::array<cpp::tuple<size_t, uint32_t>, MaxSize> data{};
+ size_t current_size{};
+};
+
+template <typename T> LIBC_INLINE constexpr bool is_power_of_two(T x) {
+ static_assert(cpp::is_unsigned_v<T>,
+ "is_power_of_two requires unsigned type");
+ return x != 0 && (x & (x - 1)) == 0;
+}
+
+LIBC_INLINE constexpr size_t get_parts(size_t n) {
+ size_t parts = 0;
+ auto eps = 0.01 / 2.0;
+ auto x = static_cast<double>(n) * eps * eps / 2.0;
+ auto target_parts = static_cast<size_t>(x / math::log(x));
+ auto parts_per_shard = target_parts / SHARDS;
+ parts = ((parts_per_shard > 1) ? parts_per_shard : 1) * SHARDS;
+ return parts;
+}
+
+LIBC_INLINE constexpr size_t get_slots_per_part(size_t keys_per_part) {
+ size_t slots_per_part =
+ static_cast<size_t>(static_cast<double>(keys_per_part) / 0.99);
+ if (is_power_of_two(slots_per_part)) {
+ slots_per_part += 1;
+ }
+ return slots_per_part;
+}
+
+template <size_t n> class PtrhashConfig {
+public:
+ LIBC_INLINE_VAR static constexpr size_t PARTS = get_parts(n);
+ LIBC_INLINE_VAR static constexpr size_t KEYS_PER_PART = n / PARTS;
+ LIBC_INLINE_VAR static constexpr size_t PARTS_PER_SHARD = PARTS / SHARDS;
+ LIBC_INLINE_VAR static constexpr size_t SLOTS_PER_PART =
+ get_slots_per_part(KEYS_PER_PART);
+ LIBC_INLINE_VAR static constexpr size_t SLOTS_TOTAL = PARTS * SLOTS_PER_PART;
+ LIBC_INLINE_VAR static constexpr size_t BUCKETS_PER_PART =
+ math::ceil(KEYS_PER_PART / 3.0) + 3;
+ LIBC_INLINE_VAR static constexpr size_t BUCKETS_TOTAL =
+ PARTS * BUCKETS_PER_PART;
+};
+
+template <size_t n_, size_t parts_, size_t parts_per_shard_,
+ size_t slots_total_, size_t buckets_total_, size_t slots_,
+ size_t buckets_, typename Key = uint64_t,
+ typename F = cpp::array<uint32_t, slots_total_ - n_>,
+ typename PilotsTypeV = cpp::array<uint8_t, buckets_total_>>
+class PtrHash {
+public:
+ static_assert(
+ cpp::is_same_v<PilotsTypeV, cpp::span<uint8_t>> ||
+ cpp::is_same_v<PilotsTypeV, cpp::array<uint8_t, buckets_total_>>,
+ "V must be a byte slice or byte vector");
+
+ uint64_t seed;
+ PilotsTypeV pilots;
+ F remap;
+
+ LIBC_INLINE constexpr PtrHash(uint64_t seed_, PilotsTypeV pilots_, F remap_)
+ : seed(seed_), pilots(pilots_), remap(remap_) {}
+
+ LIBC_INLINE constexpr PtrHash(const PtrHash &) = default;
+ LIBC_INLINE constexpr PtrHash(PtrHash &&) = default;
+
+ LIBC_INLINE constexpr PtrHash &operator=(const PtrHash &) = default;
+ LIBC_INLINE constexpr PtrHash &operator=(PtrHash &&) = default;
+
+ LIBC_INLINE constexpr size_t index(Key key) const {
+ auto slot = this->index_no_remap(key);
+
+ if (slot < n_) {
+ return slot;
+ }
+
+ return this->remap[slot - n_];
+ }
+
+ LIBC_INLINE constexpr size_t index_no_remap(Key key) const {
+ auto hx = this->hash_key(key);
+ auto b = this->bucket(hx);
+ auto pilot = this->pilots[b];
+ return this->slot(hx, pilot);
+ }
+
+ LIBC_INLINE constexpr size_t slot(uint64_t hx, uint64_t pilot) const {
+ return (this->part(hx) * slots_) + this->slot_in_part(hx, pilot);
+ }
+
+ LIBC_INLINE constexpr size_t slot_in_part(uint64_t hx, uint64_t pilot) const {
+ return this->slot_in_part_hp(hx, this->hash_pilot(pilot));
+ }
+
+ LIBC_INLINE constexpr cpp::optional<cpp::tuple<uint64_t, PilotsTypeV, F>>
+ compute_pilots(const cpp::array<Key, n_> &keys) {
+ cpp::array<cpp::array<bool, slots_>, parts_> taken{};
+ for (cpp::array<bool, slots_> &t : taken) {
+ for (size_t i = 0; i < slots_; i++) {
+ t[i] = 0;
+ }
+ }
+ PilotsTypeV pilots{};
+
+ size_t tries = 0;
+ constexpr size_t MAX_TRIES = 10;
+
+ // hard code random numbers for the generator to make it simpler
+ constexpr uint64_t STDRNG[MAX_TRIES] = {
+ 0x1a275d28e2768536, 0x72737b411117ac11, 0xeb08f8fcd423148f,
+ 0x1d6f85975d49be9e, 0xf03250d1c097577, 0xac6e884d8db1fa90,
+ 0x4415d98c0c03a79f, 0xa36bfbcfddf4d5e6, 0x154aef1f436d8e98,
+ 0xd21f78471475f18e};
+
+ while (true) {
+ bool contd = false;
+ tries += 1;
+ if (tries > MAX_TRIES) {
+ return {};
+ }
+
+ this->seed = STDRNG[tries - 1];
+ pilots = PilotsTypeV{0};
+
+ for (auto &t : taken)
+ for (size_t e = 0; e < pilots.size(); e++)
+ t[e] = false;
+
+ auto shard_hashes = this->shards(keys);
+
+ const size_t pilots_chunk_size =
+ cpp::max(buckets_ * parts_per_shard_, static_cast<size_t>(1));
+ const size_t taken_chunk_size = parts_per_shard_;
+ const size_t num_pilots_chunks =
+ (pilots.size() + pilots_chunk_size - 1) / pilots_chunk_size;
+ const size_t num_taken_chunks =
+ (taken.size() + taken_chunk_size - 1) / taken_chunk_size;
+
+ for (size_t shard = 0;
+ shard < cpp::min(shard_hashes.size(),
+ cpp::min(num_pilots_chunks, num_taken_chunks));
+ shard++) {
+ cpp::array<uint64_t, n_> hashes = shard_hashes[shard];
+
+ size_t pilots_begin = shard * pilots_chunk_size;
+ size_t pilots_end =
+ cpp::min(pilots_begin + pilots_chunk_size, pilots.size());
+
+ size_t taken_begin = shard * taken_chunk_size;
+ size_t taken_end =
+ cpp::min(taken_begin + taken_chunk_size, taken.size());
+
+ cpp::optional<cpp::tuple<cpp::array<uint64_t, n_>,
+ cpp::array<uint32_t, parts_per_shard_ + 1>>>
+ sorted_parts = this->sort_parts(shard, hashes);
+ if (!sorted_parts) {
+ contd = true;
+ break;
+ }
+
+ auto &[new_hashes, part_starts] = sorted_parts.value();
+
+ if (!this->build_shard(shard, new_hashes, part_starts, pilots,
+ pilots_begin, pilots_end, taken_begin, taken_end,
+ taken)) {
+ contd = true;
+ break;
+ }
+ }
+ if (contd) {
+ continue;
+ }
+
+ auto const remap = this->remap_free_slots(taken);
+
+ if (!remap) {
+ continue;
+ }
+ break;
+ }
+ this->pilots = pilots;
+
+ return {{this->seed, this->pilots, this->remap}};
+ }
+
+ LIBC_INLINE constexpr cpp::expected<cpp::monostate, cpp::nullopt_t>
+ remap_free_slots(cpp::array<cpp::array<bool, slots_>, parts_> &taken) {
+ cpp::array<size_t, parts_> val{};
+ for (size_t i = 0; i < taken.size(); ++i) {
+ size_t counter = 0;
+ for (auto element : taken[i]) {
+ if (!element) {
+ counter++;
+ }
+ }
+ val[i] = counter;
+ }
+
+ size_t acc = 0;
+ for (const auto &item : val) {
+ acc += item;
+ }
+
+ if (acc != slots_total_ - n_) {
+ write_to_stderr("Not the right number of free slots left!\n");
+ write_to_stderr(" total slots ");
+ write_to_stderr(cpp::to_string(slots_total_));
+ write_to_stderr(" - n ");
+ write_to_stderr(cpp::to_string(n_));
+ write_to_stderr("\n");
+ return cpp::unexpected(cpp::nullopt);
+ }
+
+ if (slots_total_ == n_) {
+ return cpp::monostate{};
+ }
+
+ cpp::array<uint64_t, slots_total_ - n_> v{};
+ size_t v_idx = 0;
+
+ auto const get = [&](cpp::array<cpp::array<bool, slots_>, parts_> &t,
+ size_t idx) { return t[idx / slots_][idx % slots_]; };
+
+ size_t p = 0;
+ for (const auto &t : taken) {
+ auto const offset = p * slots_;
+ for (size_t idx = 0; idx < t.size(); idx++) {
+ if (!t[idx]) {
+ auto result = offset + idx;
+ if (result < n_) {
+ while (!get(taken, n_ + v_idx)) {
+ v[v_idx++] = result;
+ }
+ v[v_idx++] = result;
+ }
+ }
+ }
+ p++;
+ }
+
+ for (size_t i = 0; i < v.size(); i++) {
+ this->remap[i] = static_cast<uint32_t>(v[i]);
+ }
+
+ return cpp::monostate{};
+ }
+
+ LIBC_INLINE constexpr auto shards(const cpp::array<Key, n_> &keys) const {
+ return this->no_sharding(keys);
+ }
+
+ LIBC_INLINE constexpr cpp::array<cpp::array<uint64_t, n_>, 1>
+ no_sharding(const cpp::array<Key, n_> &keys) const {
+ cpp::array<uint64_t, n_> ret;
+ for (size_t i = 0; i < keys.size(); i++) {
+ ret[i] = this->hash_key(keys[i]);
+ }
+ return {ret};
+ }
+
+ LIBC_INLINE constexpr uint64_t hash_key(Key x) const {
+ uint64_t value = 0;
+ constexpr uint64_t BITS = sizeof(uint64_t) * 8;
+ value = ((value << 5) | (value >> (BITS - 5))) ^ x;
+ value *= 0x517cc1b727220a95;
+ return value ^ this->seed;
+ }
+
+ LIBC_INLINE constexpr cpp::optional<cpp::tuple<
+ cpp::array<uint64_t, n_>, cpp::array<uint32_t, parts_per_shard_ + 1>>>
+ sort_parts(size_t shard, cpp::array<uint64_t, n_> hashes) const {
+ for (size_t i = 0; i < hashes.size(); i++) {
+ for (size_t j = i + 1; j < hashes.size(); j++) {
+ if (hashes[i] > hashes[j]) {
+ auto temp = hashes[i];
+ hashes[i] = hashes[j];
+ hashes[j] = temp;
+ }
+ }
+ }
+
+ bool distinct = true;
+ for (size_t i = 1; i < hashes.size(); ++i) {
+ if (hashes[i] == hashes[i - 1]) {
+ distinct = false;
+ break;
+ }
+ }
+
+ if (!distinct) {
+ write_to_stderr("Hashes are not distinct\n");
+ return cpp::nullopt;
+ }
+
+ if (!hashes.empty()) {
+ LIBC_ASSERT(shard * parts_per_shard_ <= this->part(hashes[0]));
+ LIBC_ASSERT(this->part(hashes[hashes.size() - 1]) <
+ (shard + 1) * parts_per_shard_);
+ }
+
+ cpp::array<uint32_t, parts_per_shard_ + 1> part_starts{};
+
+ for (size_t part_in_shard = 1; part_in_shard < parts_per_shard_ + 1;
+ ++part_in_shard) {
+ auto it = cpp::lower_bound(
+ hashes.begin(), hashes.end(),
+ shard * parts_per_shard_ + part_in_shard,
+ [&](uint64_t h, uint64_t k) { return this->part(h) < k; });
+
+ part_starts[part_in_shard] =
+ static_cast<uint32_t>(cpp::distance(hashes.begin(), it));
+ }
+ size_t max_part_len = 0;
+ for (size_t i = 0; i + 1 < part_starts.size(); ++i) {
+ auto start = part_starts[i];
+ auto end = part_starts[i + 1];
+
+ size_t len = end - start;
+ max_part_len = cpp::max<size_t>(max_part_len, len);
+ }
+
+ if (max_part_len > slots_) {
+ return cpp::nullopt;
+ }
+
+ return cpp::tuple<cpp::array<uint64_t, n_>,
+ cpp::array<uint32_t, parts_per_shard_ + 1>>{hashes,
+ part_starts};
+ }
+
+ LIBC_INLINE constexpr bool
+ build_shard(size_t shard, cpp::array<uint64_t, n_> &hashes,
+ cpp::array<uint32_t, parts_per_shard_ + 1> &part_starts,
+ PilotsTypeV &pilots, size_t pilots_begin, size_t pilots_end,
+ size_t taken_begin, size_t taken_end,
+ cpp::array<cpp::array<bool, slots_>, parts_> &taken) const {
+
+ size_t pilots_chunk_size = pilots_end - pilots_begin;
+
+ auto part_in_shard = 0;
+ for (size_t taken_idx = taken_begin; taken_idx < taken_end; ++taken_idx) {
+ const auto num_chunks = pilots_chunk_size / buckets_;
+ for (size_t i = 0; i < num_chunks; ++i) {
+ size_t target_pilots_begin = pilots_begin + i * buckets_;
+ size_t target_pilots_end =
+ cpp::min(target_pilots_begin + buckets_, pilots_end);
+ auto part = shard * parts_per_shard_ + part_in_shard;
+
+ auto total_evictions = this->build_part(
+ part,
+ cpp::span<uint64_t>(hashes).subspan(part_starts[part_in_shard],
+ part_starts[part_in_shard + 1] -
+ part_starts[part_in_shard]),
+ cpp::span<uint8_t>(
+ const_cast<uint8_t *>(pilots.data() + target_pilots_begin),
+ target_pilots_end - target_pilots_begin),
+ taken[taken_idx]);
+ if (!total_evictions) {
+ return false;
+ }
+ }
+ part_in_shard++;
+ }
+
+ return true;
+ }
+
+ LIBC_INLINE constexpr cpp::optional<size_t>
+ build_part(size_t part, cpp::span<uint64_t> hashes, cpp::span<uint8_t> pilots,
+ cpp::array<bool, slots_> &taken) const {
+ cpp::tuple<cpp::array<uint32_t, buckets_ + 1>,
+ cpp::array<uint32_t, buckets_>>
+ sorted_buckets = this->sort_buckets(part, hashes);
+ cpp::array<uint32_t, buckets_ + 1> starts = cpp::get<0>(sorted_buckets);
+ cpp::array<uint32_t, buckets_> bucket_order = cpp::get<1>(sorted_buckets);
+
+ auto kmax = 256u;
+
+ cpp::array<uint32_t, slots_> slots{};
+ for (size_t i = 0; i < slots_; i++) {
+ slots[i] = BUCKET_IDX_NONE;
+ }
+
+ auto bucket_len = [&](uint32_t b) constexpr -> size_t {
+ return starts[b + 1] - starts[b];
+ };
+
+ auto heap = BinaryHeap<>();
+
+ auto duplicate_slots = [&](uint32_t b, uint64_t p) constexpr {
+ auto hp = this->hash_pilot(p);
+ auto hashes_range = hashes.subspan(starts[b], starts[b + 1] - starts[b]);
+
+ auto i = 0;
+ for (auto const &e1 : hashes_range) {
+ auto hx = this->slot_in_part_hp(e1, hp);
+ for (auto e2 : hashes_range.subspan(i + 1)) {
+ auto hy = this->slot_in_part_hp(e2, hp);
+ if (hx == hy) {
+ return true;
+ }
+ }
+ i++;
+ }
+ return false;
+ };
+
+ cpp::array<uint32_t, 16> recent{
+ {BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE,
+ BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE,
+ BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE,
+ BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE}};
+ size_t total_evictions = 0;
+
+ auto rng = FastRand();
+
+ for (size_t iter_num = 0; iter_num < bucket_order.size(); iter_num++) {
+ auto const &new_b = bucket_order[iter_num];
+ auto const new_bucket =
+ hashes.subspan(starts[new_b], starts[new_b + 1] - starts[new_b]);
+ if (new_bucket.empty()) {
+ pilots[new_b] = 0;
+ continue;
+ }
+ auto const new_b_len = new_bucket.size();
+ size_t evictions = 0;
+
+ heap.push({new_b_len, new_b});
+ for (size_t i = 0; i < recent.size(); ++i) {
+ recent[i] = BUCKET_IDX_NONE;
+ }
+ auto recent_idx = 0;
+ recent[0] = new_b;
+
+ while (!heap.empty()) {
----------------
bassiounix wrote:
I tried to separate them in other functions but because the of dependent nature of the loops I can't separate them, it causes a crash in Cling/`cppyy` because its internal behavior in evaluating/interpreting the code.
https://github.com/llvm/llvm-project/pull/187670
More information about the libc-commits
mailing list