[libc-commits] [libc] [libc][wctype] Add perfect hash map for conversion functions (PR #187670)

Muhammad Bassiouni via libc-commits libc-commits at lists.llvm.org
Thu Mar 26 20:57:55 PDT 2026


================
@@ -0,0 +1,877 @@
+//===-- Perfect hash map for conversion functions ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCTYPE_PTR_HASH_H
+#define LLVM_LIBC_SRC___SUPPORT_WCTYPE_PTR_HASH_H
+
+#define LIBC_ENABLE_CONSTEXPR 1
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wint_t.h"
+#include "src/__support/CPP/array.h"
+#include "src/__support/CPP/expected.h"
+#include "src/__support/CPP/monostate.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/span.h"
+#include "src/__support/CPP/string.h"
+#include "src/__support/CPP/tuple.h"
+#include "src/__support/CPP/type_traits/is_unsigned.h"
+#include "src/__support/OSUtil/io.h"
+#include "src/__support/math/ceil.h"
+#include "src/__support/math/log.h"
+#include "src/__support/uint128.h"
+
+#undef LIBC_ENABLE_CONSTEXPR
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace ptrhash {
+
+LIBC_INLINE_VAR constexpr size_t SHARDS = 1;
+
+class FastRand {
+public:
+  // This seed value is very important for different inputs. Bad values are
+  // known to cause compilation errors and/or incorrect computations in some
+  // cases. Defaulted to 0xEF6F79ED30BA75A in the original implementation, but
+  // this is not sufficient. 0x64a727ea04c46a32 is another viable seed.
+  LIBC_INLINE constexpr FastRand() : seed(0xeec13c9f1362aa74) {}
+
+  LIBC_INLINE constexpr uint8_t gen_byte() {
+    return static_cast<uint8_t>(this->gen());
+  }
+
+  LIBC_INLINE constexpr uint64_t gen() {
+    constexpr uint64_t WY_CONST_0 = 0x2d35'8dcc'aa6c'78a5;
+    constexpr uint64_t WY_CONST_1 = 0x8bb8'4b93'962e'acc9;
+
+    auto s = wrapping_add(seed, WY_CONST_0);
+    seed = s;
+    auto const t =
+        static_cast<UInt128>(s) * static_cast<UInt128>(s ^ WY_CONST_1);
+    return static_cast<uint64_t>(t) ^ static_cast<uint64_t>(t >> 64);
+  }
+
+private:
+  template <typename T> LIBC_INLINE static constexpr T wrapping_add(T a, T b) {
+    while (b != 0) {
+      T carry = a & b;
+      a = a ^ b;
+      b = carry << 1;
+    }
+    return a;
+  }
+
+private:
+  uint64_t seed;
+};
+
+LIBC_INLINE_VAR constexpr auto BUCKET_IDX_NONE = ~static_cast<uint32_t>(0);
+
+template <size_t MaxSize = 5> class BinaryHeap {
+public:
+  LIBC_INLINE constexpr BinaryHeap() = default;
+
+  LIBC_INLINE constexpr void push(const cpp::tuple<size_t, uint32_t> &value) {
+    if (current_size >= MaxSize)
+      return;
+    data[current_size++] = value;
+  }
+
+  LIBC_INLINE constexpr cpp::tuple<size_t, uint32_t> pop() {
+    if (current_size == 0)
+      return {};
+    size_t max_idx = 0;
+    for (size_t i = 1; i < current_size; ++i) {
+      if (cmp(data[max_idx], data[i]))
+        max_idx = i;
+    }
+    auto top = data[max_idx];
+    data[max_idx] = data[current_size - 1];
+    --current_size;
+    return top;
+  }
+
+  LIBC_INLINE constexpr const cpp::tuple<size_t, uint32_t> &peek() const {
+    size_t max_idx = 0;
+    for (size_t i = 1; i < current_size; ++i) {
+      if (cmp(data[max_idx], data[i]))
+        max_idx = i;
+    }
+    return data[max_idx];
+  }
+
+  LIBC_INLINE constexpr bool empty() const { return current_size == 0; }
+
+private:
+  LIBC_INLINE static constexpr bool cmp(cpp::tuple<size_t, uint32_t> x,
+                                        cpp::tuple<size_t, uint32_t> y) {
+    return cpp::get<0>(x) < cpp::get<0>(y) ||
+           (!(cpp::get<0>(y) < cpp::get<0>(x)) &&
+            cpp::get<1>(x) < cpp::get<1>(y));
+  }
+
+private:
+  cpp::array<cpp::tuple<size_t, uint32_t>, MaxSize> data{};
+  size_t current_size{};
+};
+
+template <typename T> LIBC_INLINE constexpr bool is_power_of_two(T x) {
+  static_assert(cpp::is_unsigned_v<T>,
+                "is_power_of_two requires unsigned type");
+  return x != 0 && (x & (x - 1)) == 0;
+}
+
+LIBC_INLINE constexpr size_t get_parts(size_t n) {
+  size_t parts = 0;
+  auto eps = 0.01 / 2.0;
+  auto x = static_cast<double>(n) * eps * eps / 2.0;
+  auto target_parts = static_cast<size_t>(x / math::log(x));
+  auto parts_per_shard = target_parts / SHARDS;
+  parts = ((parts_per_shard > 1) ? parts_per_shard : 1) * SHARDS;
+  return parts;
+}
+
+LIBC_INLINE constexpr size_t get_slots_per_part(size_t keys_per_part) {
+  size_t slots_per_part =
+      static_cast<size_t>(static_cast<double>(keys_per_part) / 0.99);
+  if (is_power_of_two(slots_per_part)) {
+    slots_per_part += 1;
+  }
+  return slots_per_part;
+}
+
+template <size_t n> class PtrhashConfig {
+public:
+  LIBC_INLINE_VAR static constexpr size_t PARTS = get_parts(n);
+  LIBC_INLINE_VAR static constexpr size_t KEYS_PER_PART = n / PARTS;
+  LIBC_INLINE_VAR static constexpr size_t PARTS_PER_SHARD = PARTS / SHARDS;
+  LIBC_INLINE_VAR static constexpr size_t SLOTS_PER_PART =
+      get_slots_per_part(KEYS_PER_PART);
+  LIBC_INLINE_VAR static constexpr size_t SLOTS_TOTAL = PARTS * SLOTS_PER_PART;
+  LIBC_INLINE_VAR static constexpr size_t BUCKETS_PER_PART =
+      math::ceil(KEYS_PER_PART / 3.0) + 3;
+  LIBC_INLINE_VAR static constexpr size_t BUCKETS_TOTAL =
+      PARTS * BUCKETS_PER_PART;
+};
+
+template <size_t n_, size_t parts_, size_t parts_per_shard_,
+          size_t slots_total_, size_t buckets_total_, size_t slots_,
+          size_t buckets_, typename Key = uint64_t,
+          typename F = cpp::array<uint32_t, slots_total_ - n_>,
+          typename PilotsTypeV = cpp::array<uint8_t, buckets_total_>>
+class PtrHash {
+public:
+  static_assert(
+      cpp::is_same_v<PilotsTypeV, cpp::span<uint8_t>> ||
+          cpp::is_same_v<PilotsTypeV, cpp::array<uint8_t, buckets_total_>>,
+      "V must be a byte slice or byte vector");
+
+  uint64_t seed;
+  PilotsTypeV pilots;
+  F remap;
+
+  LIBC_INLINE constexpr PtrHash(uint64_t seed_, PilotsTypeV pilots_, F remap_)
+      : seed(seed_), pilots(pilots_), remap(remap_) {}
+
+  LIBC_INLINE constexpr PtrHash(const PtrHash &) = default;
+  LIBC_INLINE constexpr PtrHash(PtrHash &&) = default;
+
+  LIBC_INLINE constexpr PtrHash &operator=(const PtrHash &) = default;
+  LIBC_INLINE constexpr PtrHash &operator=(PtrHash &&) = default;
+
+  LIBC_INLINE constexpr size_t index(Key key) const {
+    auto slot = this->index_no_remap(key);
+
+    if (slot < n_) {
+      return slot;
+    }
+
+    return this->remap[slot - n_];
+  }
+
+  LIBC_INLINE constexpr size_t index_no_remap(Key key) const {
+    auto hx = this->hash_key(key);
+    auto b = this->bucket(hx);
+    auto pilot = this->pilots[b];
+    return this->slot(hx, pilot);
+  }
+
+  LIBC_INLINE constexpr size_t slot(uint64_t hx, uint64_t pilot) const {
+    return (this->part(hx) * slots_) + this->slot_in_part(hx, pilot);
+  }
+
+  LIBC_INLINE constexpr size_t slot_in_part(uint64_t hx, uint64_t pilot) const {
+    return this->slot_in_part_hp(hx, this->hash_pilot(pilot));
+  }
+
+  LIBC_INLINE constexpr cpp::optional<cpp::tuple<uint64_t, PilotsTypeV, F>>
+  compute_pilots(const cpp::array<Key, n_> &keys) {
+    cpp::array<cpp::array<bool, slots_>, parts_> taken{};
+    for (cpp::array<bool, slots_> &t : taken) {
+      for (size_t i = 0; i < slots_; i++) {
+        t[i] = 0;
+      }
+    }
+    PilotsTypeV pilots{};
+
+    size_t tries = 0;
+    constexpr size_t MAX_TRIES = 10;
+
+    // hard code random numbers for the generator to make it simpler
+    constexpr uint64_t STDRNG[MAX_TRIES] = {
+        0x1a275d28e2768536, 0x72737b411117ac11, 0xeb08f8fcd423148f,
+        0x1d6f85975d49be9e, 0xf03250d1c097577,  0xac6e884d8db1fa90,
+        0x4415d98c0c03a79f, 0xa36bfbcfddf4d5e6, 0x154aef1f436d8e98,
+        0xd21f78471475f18e};
+
+    while (true) {
+      bool contd = false;
+      tries += 1;
+      if (tries > MAX_TRIES) {
+        return {};
+      }
+
+      this->seed = STDRNG[tries - 1];
+      pilots = PilotsTypeV{0};
+
+      for (auto &t : taken)
+        for (size_t e = 0; e < pilots.size(); e++)
+          t[e] = false;
+
+      auto shard_hashes = this->shards(keys);
+
+      const size_t pilots_chunk_size =
+          cpp::max(buckets_ * parts_per_shard_, static_cast<size_t>(1));
+      const size_t taken_chunk_size = parts_per_shard_;
+      const size_t num_pilots_chunks =
+          (pilots.size() + pilots_chunk_size - 1) / pilots_chunk_size;
+      const size_t num_taken_chunks =
+          (taken.size() + taken_chunk_size - 1) / taken_chunk_size;
+
+      for (size_t shard = 0;
+           shard < cpp::min(shard_hashes.size(),
+                            cpp::min(num_pilots_chunks, num_taken_chunks));
+           shard++) {
+        cpp::array<uint64_t, n_> hashes = shard_hashes[shard];
+
+        size_t pilots_begin = shard * pilots_chunk_size;
+        size_t pilots_end =
+            cpp::min(pilots_begin + pilots_chunk_size, pilots.size());
+
+        size_t taken_begin = shard * taken_chunk_size;
+        size_t taken_end =
+            cpp::min(taken_begin + taken_chunk_size, taken.size());
+
+        cpp::optional<cpp::tuple<cpp::array<uint64_t, n_>,
+                                 cpp::array<uint32_t, parts_per_shard_ + 1>>>
+            sorted_parts = this->sort_parts(shard, hashes);
+        if (!sorted_parts) {
+          contd = true;
+          break;
+        }
+
+        auto &[new_hashes, part_starts] = sorted_parts.value();
+
+        if (!this->build_shard(shard, new_hashes, part_starts, pilots,
+                               pilots_begin, pilots_end, taken_begin, taken_end,
+                               taken)) {
+          contd = true;
+          break;
+        }
+      }
+      if (contd) {
+        continue;
+      }
+
+      auto const remap = this->remap_free_slots(taken);
+
+      if (!remap) {
+        continue;
+      }
+      break;
+    }
+    this->pilots = pilots;
+
+    return {{this->seed, this->pilots, this->remap}};
+  }
+
+  LIBC_INLINE constexpr cpp::expected<cpp::monostate, cpp::nullopt_t>
+  remap_free_slots(cpp::array<cpp::array<bool, slots_>, parts_> &taken) {
+    cpp::array<size_t, parts_> val{};
+    for (size_t i = 0; i < taken.size(); ++i) {
+      size_t counter = 0;
+      for (auto element : taken[i]) {
+        if (!element) {
+          counter++;
+        }
+      }
+      val[i] = counter;
+    }
+
+    size_t acc = 0;
+    for (const auto &item : val) {
+      acc += item;
+    }
+
+    if (acc != slots_total_ - n_) {
+      write_to_stderr("Not the right number of free slots left!\n");
+      write_to_stderr(" total slots ");
+      write_to_stderr(cpp::to_string(slots_total_));
+      write_to_stderr(" - n ");
+      write_to_stderr(cpp::to_string(n_));
+      write_to_stderr("\n");
+      return cpp::unexpected(cpp::nullopt);
+    }
+
+    if (slots_total_ == n_) {
+      return cpp::monostate{};
+    }
+
+    cpp::array<uint64_t, slots_total_ - n_> v{};
+    size_t v_idx = 0;
+
+    auto const get = [&](cpp::array<cpp::array<bool, slots_>, parts_> &t,
+                         size_t idx) { return t[idx / slots_][idx % slots_]; };
+
+    size_t p = 0;
+    for (const auto &t : taken) {
+      auto const offset = p * slots_;
+      for (size_t idx = 0; idx < t.size(); idx++) {
+        if (!t[idx]) {
+          auto result = offset + idx;
+          if (result < n_) {
+            while (!get(taken, n_ + v_idx)) {
+              v[v_idx++] = result;
+            }
+            v[v_idx++] = result;
+          }
+        }
+      }
+      p++;
+    }
+
+    for (size_t i = 0; i < v.size(); i++) {
+      this->remap[i] = static_cast<uint32_t>(v[i]);
+    }
+
+    return cpp::monostate{};
+  }
+
+  LIBC_INLINE constexpr auto shards(const cpp::array<Key, n_> &keys) const {
+    return this->no_sharding(keys);
+  }
+
+  LIBC_INLINE constexpr cpp::array<cpp::array<uint64_t, n_>, 1>
+  no_sharding(const cpp::array<Key, n_> &keys) const {
+    cpp::array<uint64_t, n_> ret;
+    for (size_t i = 0; i < keys.size(); i++) {
+      ret[i] = this->hash_key(keys[i]);
+    }
+    return {ret};
+  }
+
+  LIBC_INLINE constexpr uint64_t hash_key(Key x) const {
+    uint64_t value = 0;
+    constexpr uint64_t BITS = sizeof(uint64_t) * 8;
+    value = ((value << 5) | (value >> (BITS - 5))) ^ x;
+    value *= 0x517cc1b727220a95;
+    return value ^ this->seed;
+  }
+
+  LIBC_INLINE constexpr cpp::optional<cpp::tuple<
+      cpp::array<uint64_t, n_>, cpp::array<uint32_t, parts_per_shard_ + 1>>>
+  sort_parts(size_t shard, cpp::array<uint64_t, n_> hashes) const {
+    for (size_t i = 0; i < hashes.size(); i++) {
+      for (size_t j = i + 1; j < hashes.size(); j++) {
+        if (hashes[i] > hashes[j]) {
+          auto temp = hashes[i];
+          hashes[i] = hashes[j];
+          hashes[j] = temp;
+        }
+      }
+    }
+
+    bool distinct = true;
+    for (size_t i = 1; i < hashes.size(); ++i) {
+      if (hashes[i] == hashes[i - 1]) {
+        distinct = false;
+        break;
+      }
+    }
+
+    if (!distinct) {
+      write_to_stderr("Hashes are not distinct\n");
+      return cpp::nullopt;
+    }
+
+    if (!hashes.empty()) {
+      LIBC_ASSERT(shard * parts_per_shard_ <= this->part(hashes[0]));
+      LIBC_ASSERT(this->part(hashes[hashes.size() - 1]) <
+                  (shard + 1) * parts_per_shard_);
+    }
+
+    cpp::array<uint32_t, parts_per_shard_ + 1> part_starts{};
+
+    for (size_t part_in_shard = 1; part_in_shard < parts_per_shard_ + 1;
+         ++part_in_shard) {
+      auto it = cpp::lower_bound(
+          hashes.begin(), hashes.end(),
+          shard * parts_per_shard_ + part_in_shard,
+          [&](uint64_t h, uint64_t k) { return this->part(h) < k; });
+
+      part_starts[part_in_shard] =
+          static_cast<uint32_t>(cpp::distance(hashes.begin(), it));
+    }
+    size_t max_part_len = 0;
+    for (size_t i = 0; i + 1 < part_starts.size(); ++i) {
+      auto start = part_starts[i];
+      auto end = part_starts[i + 1];
+
+      size_t len = end - start;
+      max_part_len = cpp::max<size_t>(max_part_len, len);
+    }
+
+    if (max_part_len > slots_) {
+      return cpp::nullopt;
+    }
+
+    return cpp::tuple<cpp::array<uint64_t, n_>,
+                      cpp::array<uint32_t, parts_per_shard_ + 1>>{hashes,
+                                                                  part_starts};
+  }
+
+  LIBC_INLINE constexpr bool
+  build_shard(size_t shard, cpp::array<uint64_t, n_> &hashes,
+              cpp::array<uint32_t, parts_per_shard_ + 1> &part_starts,
+              PilotsTypeV &pilots, size_t pilots_begin, size_t pilots_end,
+              size_t taken_begin, size_t taken_end,
+              cpp::array<cpp::array<bool, slots_>, parts_> &taken) const {
+
+    size_t pilots_chunk_size = pilots_end - pilots_begin;
+
+    auto part_in_shard = 0;
+    for (size_t taken_idx = taken_begin; taken_idx < taken_end; ++taken_idx) {
+      const auto num_chunks = pilots_chunk_size / buckets_;
+      for (size_t i = 0; i < num_chunks; ++i) {
+        size_t target_pilots_begin = pilots_begin + i * buckets_;
+        size_t target_pilots_end =
+            cpp::min(target_pilots_begin + buckets_, pilots_end);
+        auto part = shard * parts_per_shard_ + part_in_shard;
+
+        auto total_evictions = this->build_part(
+            part,
+            cpp::span<uint64_t>(hashes).subspan(part_starts[part_in_shard],
+                                                part_starts[part_in_shard + 1] -
+                                                    part_starts[part_in_shard]),
+            cpp::span<uint8_t>(
+                const_cast<uint8_t *>(pilots.data() + target_pilots_begin),
+                target_pilots_end - target_pilots_begin),
+            taken[taken_idx]);
+        if (!total_evictions) {
+          return false;
+        }
+      }
+      part_in_shard++;
+    }
+
+    return true;
+  }
+
+  LIBC_INLINE constexpr cpp::optional<size_t>
+  build_part(size_t part, cpp::span<uint64_t> hashes, cpp::span<uint8_t> pilots,
+             cpp::array<bool, slots_> &taken) const {
+    cpp::tuple<cpp::array<uint32_t, buckets_ + 1>,
+               cpp::array<uint32_t, buckets_>>
+        sorted_buckets = this->sort_buckets(part, hashes);
+    cpp::array<uint32_t, buckets_ + 1> starts = cpp::get<0>(sorted_buckets);
+    cpp::array<uint32_t, buckets_> bucket_order = cpp::get<1>(sorted_buckets);
+
+    auto kmax = 256u;
+
+    cpp::array<uint32_t, slots_> slots{};
+    for (size_t i = 0; i < slots_; i++) {
+      slots[i] = BUCKET_IDX_NONE;
+    }
+
+    auto bucket_len = [&](uint32_t b) constexpr -> size_t {
+      return starts[b + 1] - starts[b];
+    };
+
+    auto heap = BinaryHeap<>();
+
+    auto duplicate_slots = [&](uint32_t b, uint64_t p) constexpr {
+      auto hp = this->hash_pilot(p);
+      auto hashes_range = hashes.subspan(starts[b], starts[b + 1] - starts[b]);
+
+      auto i = 0;
+      for (auto const &e1 : hashes_range) {
+        auto hx = this->slot_in_part_hp(e1, hp);
+        for (auto e2 : hashes_range.subspan(i + 1)) {
+          auto hy = this->slot_in_part_hp(e2, hp);
+          if (hx == hy) {
+            return true;
+          }
+        }
+        i++;
+      }
+      return false;
+    };
+
+    cpp::array<uint32_t, 16> recent{
+        {BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE,
+         BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE,
+         BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE,
+         BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE, BUCKET_IDX_NONE}};
+    size_t total_evictions = 0;
+
+    auto rng = FastRand();
+
+    for (size_t iter_num = 0; iter_num < bucket_order.size(); iter_num++) {
+      auto const &new_b = bucket_order[iter_num];
+      auto const new_bucket =
+          hashes.subspan(starts[new_b], starts[new_b + 1] - starts[new_b]);
+      if (new_bucket.empty()) {
+        pilots[new_b] = 0;
+        continue;
+      }
+      auto const new_b_len = new_bucket.size();
+      size_t evictions = 0;
+
+      heap.push({new_b_len, new_b});
+      for (size_t i = 0; i < recent.size(); ++i) {
+        recent[i] = BUCKET_IDX_NONE;
+      }
+      auto recent_idx = 0;
+      recent[0] = new_b;
+
+      while (!heap.empty()) {
----------------
bassiounix wrote:

I tried to separate them in other functions but because the of dependent nature of the loops I can't separate them, it causes a crash in Cling/`cppyy` because its internal behavior in evaluating/interpreting the code.

https://github.com/llvm/llvm-project/pull/187670


More information about the libc-commits mailing list