[libc-commits] [libc] [libc] [search] implement hcreate(_r)/hsearch(_r)/hdestroy(_r) (PR #73469)
via libc-commits
libc-commits at lists.llvm.org
Sun Nov 26 16:32:25 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: Schrodinger ZHU Yifan (SchrodingerZhu)
<details>
<summary>Changes</summary>
This patch implements `implement hcreate(_r)/hsearch(_r)/hdestroy(_r)` as specified in https://man7.org/linux/man-pages/man3/hsearch.3.html.
Notice that `neon/asimd` extension is not yet added in this patch.
- The implementation is largely simplified from rust's [`hashbrown`](https://github.com/rust-lang/hashbrown/blob/master/src/raw/mod.rs) as we only consider fix-sized insertion-only hashtables. Technical details are provided in code comments.
- This patch also contains a portable string hash function, which is derived from [`aHash`](https://github.com/tkaitchuck/aHash)'s fallback routine. Not using any SIMD acceleration, it has a good enough quality (passing all SMHasher tests) and is not too bad in speed.
- Some general functionalities are added, such as `memory_size`, `offset_to`(alignment), `next_power_of_two`, `is_power_of_two`. `ctz/clz` are extended to support shorter integers.
---
Patch is 93.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73469.diff
58 Files Affected:
- (modified) libc/config/linux/aarch64/entrypoints.txt (+8)
- (modified) libc/config/linux/aarch64/headers.txt (+1)
- (modified) libc/config/linux/api.td (+4)
- (modified) libc/config/linux/arm/entrypoints.txt (+8)
- (modified) libc/config/linux/arm/headers.txt (+1)
- (modified) libc/config/linux/riscv/entrypoints.txt (+8)
- (modified) libc/config/linux/riscv/headers.txt (+1)
- (modified) libc/config/linux/x86_64/entrypoints.txt (+8)
- (modified) libc/config/linux/x86_64/headers.txt (+1)
- (modified) libc/include/CMakeLists.txt (+12)
- (added) libc/include/llvm-libc-types/ACTION.h (+14)
- (modified) libc/include/llvm-libc-types/CMakeLists.txt (+3)
- (added) libc/include/llvm-libc-types/ENTRY.h (+17)
- (added) libc/include/llvm-libc-types/struct_hsearch_data.h (+17)
- (added) libc/include/search.h.def (+18)
- (modified) libc/spec/gnu_ext.td (+39-1)
- (modified) libc/spec/posix.td (+34-1)
- (modified) libc/spec/spec.td (+5)
- (modified) libc/src/CMakeLists.txt (+1)
- (modified) libc/src/__support/CMakeLists.txt (+22)
- (added) libc/src/__support/HashTable/CMakeLists.txt (+48)
- (added) libc/src/__support/HashTable/bitmask.h (+91)
- (added) libc/src/__support/HashTable/generic/bitmask_impl.inc (+102)
- (added) libc/src/__support/HashTable/randomness.h (+59)
- (added) libc/src/__support/HashTable/sse2/bitmask_impl.inc (+40)
- (added) libc/src/__support/HashTable/table.h (+229)
- (modified) libc/src/__support/bit.h (+27)
- (added) libc/src/__support/hash.h (+161)
- (added) libc/src/__support/memory_size.h (+72)
- (added) libc/src/search/CMakeLists.txt (+77)
- (added) libc/src/search/hcreate.cpp (+28)
- (added) libc/src/search/hcreate.h (+18)
- (added) libc/src/search/hcreate_r.cpp (+32)
- (added) libc/src/search/hcreate_r.h (+18)
- (added) libc/src/search/hdestroy.cpp (+20)
- (added) libc/src/search/hdestroy.h (+18)
- (added) libc/src/search/hdestroy_r.cpp (+25)
- (added) libc/src/search/hdestroy_r.h (+18)
- (added) libc/src/search/hsearch.cpp (+35)
- (added) libc/src/search/hsearch.h (+18)
- (added) libc/src/search/hsearch/CMakeLists.txt (+7)
- (added) libc/src/search/hsearch/global.cpp (+13)
- (added) libc/src/search/hsearch/global.h (+13)
- (added) libc/src/search/hsearch_r.cpp (+42)
- (added) libc/src/search/hsearch_r.h (+19)
- (modified) libc/test/src/CMakeLists.txt (+1)
- (modified) libc/test/src/__support/CMakeLists.txt (+24)
- (added) libc/test/src/__support/HashTable/CMakeLists.txt (+67)
- (added) libc/test/src/__support/HashTable/bitmask_test.cpp (+69)
- (added) libc/test/src/__support/HashTable/group_test.cpp (+91)
- (added) libc/test/src/__support/HashTable/table_test.cpp (+77)
- (modified) libc/test/src/__support/bit_test.cpp (+17)
- (added) libc/test/src/__support/hash_test.cpp (+135)
- (added) libc/test/src/__support/memory_size_test.cpp (+86)
- (added) libc/test/src/search/CMakeLists.txt (+16)
- (added) libc/test/src/search/hsearch_test.cpp (+124)
- (added) libc/utils/smhasher/smhasher.patch (+205)
- (added) libc/utils/smhasher/smhasher.txt (+1)
``````````diff
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 284feb7b99096ec..ecefa5884adb3eb 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -130,6 +130,14 @@ set(TARGET_LIBC_ENTRYPOINTS
#libc.src.stdio.scanf
#libc.src.stdio.fscanf
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# sys/mman.h entrypoints
libc.src.sys.mman.madvise
libc.src.sys.mman.mmap
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index c47e05c924fd94f..cfca5959b5ffa57 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -12,6 +12,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
libc.include.sys_mman
libc.include.sys_socket
libc.include.sys_syscall
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 377763b97cfd958..726e58f376eaa76 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -248,3 +248,7 @@ def TermiosAPI : PublicAPI<"termios.h"> {
def SetJmpAPI : PublicAPI<"setjmp.h"> {
let Types = ["jmp_buf"];
}
+
+def SearchAPI : PublicAPI<"search.h"> {
+ let Types = ["ACTION", "ENTRY", "struct hsearch_data"];
+}
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 27c0b8e5b3a3aa2..ee701c04b2e2a8a 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -89,6 +89,14 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.strtoul
libc.src.stdlib.strtoull
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# sys/mman.h entrypoints
libc.src.sys.mman.mmap
libc.src.sys.mman.munmap
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index fe7c88e922e07e8..bd08d8f8fa437fb 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -7,4 +7,5 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
)
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index a5f0c91e32d0810..1ccb40108bd8507 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -136,6 +136,14 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdio.scanf
libc.src.stdio.fscanf
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# sys/mman.h entrypoints
libc.src.sys.mman.madvise
libc.src.sys.mman.mmap
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 24247ee5819f94a..3e2b1630f1695eb 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
libc.include.termios
libc.include.threads
libc.include.time
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 63aa7473115a08e..43266e0e5b66e61 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -497,6 +497,14 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.spawn.posix_spawn_file_actions_destroy
libc.src.spawn.posix_spawn_file_actions_init
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 24247ee5819f94a..3e2b1630f1695eb 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
libc.include.termios
libc.include.threads
libc.include.time
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 9d170603ffa45cd..429c0f1f12866a8 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -133,6 +133,18 @@ add_gen_header(
.llvm-libc-types.size_t
)
+add_gen_header(
+ search
+ DEF_FILE search.h.def
+ GEN_HDR search.h
+ DEPENDS
+ .llvm_libc_common_h
+ .llvm-libc-types.ACTION
+ .llvm-libc-types.ENTRY
+ .llvm-libc-types.struct_hsearch_data
+ .llvm-libc-types.size_t
+)
+
add_gen_header(
time
DEF_FILE time.h.def
diff --git a/libc/include/llvm-libc-types/ACTION.h b/libc/include/llvm-libc-types/ACTION.h
new file mode 100644
index 000000000000000..7181a59b177d6b6
--- /dev/null
+++ b/libc/include/llvm-libc-types/ACTION.h
@@ -0,0 +1,14 @@
+//===-- Definition of ACTION type -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ACTION_H__
+#define __LLVM_LIBC_TYPES_ACTION_H__
+
+typedef enum { FIND, ENTER } ACTION;
+
+#endif // __LLVM_LIBC_TYPES_ACTION_H__
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 3c0cc7bbc71dacb..225ad780c4d01f2 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -91,3 +91,6 @@ add_header(wint_t HDR wint_t.h)
add_header(sa_family_t HDR sa_family_t.h)
add_header(struct_sockaddr HDR struct_sockaddr.h)
add_header(rpc_opcodes_t HDR rpc_opcodes_t.h)
+add_header(ACTION HDR ACTION.h)
+add_header(ENTRY HDR ENTRY.h)
+add_header(struct_hsearch_data HDR struct_hsearch_data.h)
diff --git a/libc/include/llvm-libc-types/ENTRY.h b/libc/include/llvm-libc-types/ENTRY.h
new file mode 100644
index 000000000000000..0ccb5938207acc8
--- /dev/null
+++ b/libc/include/llvm-libc-types/ENTRY.h
@@ -0,0 +1,17 @@
+//===-- Definition of ENTRY type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ENTRY_H__
+#define __LLVM_LIBC_TYPES_ENTRY_H__
+
+typedef struct {
+ char *key;
+ void *data;
+} ENTRY;
+
+#endif // __LLVM_LIBC_TYPES_ENTRY_H__
diff --git a/libc/include/llvm-libc-types/struct_hsearch_data.h b/libc/include/llvm-libc-types/struct_hsearch_data.h
new file mode 100644
index 000000000000000..7e2a7232fce5358
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_hsearch_data.h
@@ -0,0 +1,17 @@
+//===-- Definition of type struct hsearch_data ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
+#define __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
+
+struct hsearch_data {
+ void *__opaque;
+ unsigned int __unused[2];
+};
+
+#endif // __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
diff --git a/libc/include/search.h.def b/libc/include/search.h.def
new file mode 100644
index 000000000000000..3435c1f8ad048ea
--- /dev/null
+++ b/libc/include/search.h.def
@@ -0,0 +1,18 @@
+//===-- POSIX header search.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SEARCH_H
+#define LLVM_LIBC_SEARCH_H
+
+#include <__llvm-libc-common.h>
+#define __need_size_t
+#include <stddef.h>
+
+%%public_api()
+
+#endif // LLVM_LIBC_SEARCH_H
diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td
index dfb12419d14005b..cb0407c84d4e212 100644
--- a/libc/spec/gnu_ext.td
+++ b/libc/spec/gnu_ext.td
@@ -3,6 +3,8 @@ def CpuSetPtr : PtrType<CpuSetT>;
def ConstCpuSetPtr : ConstType<CpuSetPtr>;
def QSortRCompareT : NamedType<"__qsortrcompare_t">;
+def StructHsearchData : NamedType<"struct hsearch_data">;
+def StructHsearchDataPtr : PtrType<StructHsearchData>;
def GnuExtensions : StandardSpec<"GNUExtensions"> {
NamedType CookieIOFunctionsT = NamedType<"cookie_io_functions_t">;
@@ -54,7 +56,6 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
>,
]
>;
-
HeaderSpec String = HeaderSpec<
"string.h",
[], // Macros
@@ -89,6 +90,42 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
]
>;
+ HeaderSpec Search = HeaderSpec<
+ "search.h",
+ [], // Macros
+ [
+ StructHsearchData
+ ],
+ [], // Enumerations
+ [
+ FunctionSpec<
+ "hcreate_r",
+ RetValSpec<IntType>,
+ [
+ ArgSpec<SizeTType>,
+ ArgSpec<StructHsearchDataPtr>
+ ]
+ >,
+ FunctionSpec<
+ "hdestroy_r",
+ RetValSpec<VoidType>,
+ [
+ ArgSpec<StructHsearchDataPtr>
+ ]
+ >,
+ FunctionSpec<
+ "hsearch_r",
+ RetValSpec<IntType>,
+ [
+ ArgSpec<EntryType>,
+ ArgSpec<ActionType>,
+ ArgSpec<EntryTypePtrPtr>,
+ ArgSpec<StructHsearchDataPtr>
+ ]
+ >,
+ ]
+ >;
+
HeaderSpec FEnv = HeaderSpec<
"fenv.h",
[], // Macros
@@ -243,6 +280,7 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
StdIO,
StdLib,
String,
+ Search,
UniStd,
];
}
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index a367cf2a6935c02..c7acf6d25a2d873 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -1269,6 +1269,38 @@ def POSIX : StandardSpec<"POSIX"> {
]
>;
+ HeaderSpec Search = HeaderSpec<
+ "search.h",
+ [], // Macros
+ [
+ ActionType,
+ EntryType
+ ], // Types
+ [], // Enumerations
+ [
+ FunctionSpec<
+ "hcreate",
+ RetValSpec<IntType>,
+ [
+ ArgSpec<SizeTType>
+ ]
+ >,
+ FunctionSpec<
+ "hdestroy",
+ RetValSpec<VoidType>,
+ [] // Args
+ >,
+ FunctionSpec<
+ "hsearch",
+ RetValSpec<EntryTypePtr>,
+ [
+ ArgSpec<EntryType>,
+ ArgSpec<ActionType>
+ ]
+ >,
+ ]
+ >;
+
HeaderSpec Termios = HeaderSpec<
"termios.h",
[
@@ -1414,6 +1446,7 @@ def POSIX : StandardSpec<"POSIX"> {
Time,
Termios,
UniStd,
- String
+ String,
+ Search,
];
}
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index b0d5511a4f087ee..9b689b5eb502a9f 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -140,6 +140,11 @@ def SuSecondsT : NamedType<"suseconds_t">;
//added because __assert_fail needs it.
def UnsignedType : NamedType<"unsigned">;
+def ActionType : NamedType<"ACTION">;
+def EntryType : NamedType<"ENTRY">;
+def EntryTypePtr : PtrType<EntryType>;
+def EntryTypePtrPtr : PtrType<EntryTypePtr>;
+
class Macro<string name> {
string Name = name;
}
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 88838eecc53c9a1..3ab62a4f667d260 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -35,3 +35,4 @@ add_subdirectory(signal)
add_subdirectory(spawn)
add_subdirectory(threads)
add_subdirectory(time)
+add_subdirectory(search)
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index b939fae3be791da..cdd9d9dfe55a4d4 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -230,6 +230,26 @@ add_header_library(
libc.src.__support.OSUtil.osutil
)
+add_header_library(
+ hash
+ HDRS
+ hash.h
+ DEPENDS
+ .common
+ .uint128
+)
+
+add_header_library(
+ memory_size
+ HDRS
+ memory_size.h
+ DEPENDS
+ libc.src.__support.CPP.type_traits
+ libc.src.__support.CPP.limits
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.attributes
+)
+
add_subdirectory(FPUtil)
add_subdirectory(OSUtil)
add_subdirectory(StringUtil)
@@ -241,3 +261,5 @@ add_subdirectory(RPC)
add_subdirectory(threads)
add_subdirectory(File)
+
+add_subdirectory(HashTable)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
new file mode 100644
index 000000000000000..ae2fb640141c18c
--- /dev/null
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -0,0 +1,48 @@
+add_header_library(
+ bitmask
+ HDRS
+ bitmask.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.bit
+)
+
+list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index)
+if (NOT ${getrandom_index} EQUAL -1)
+ message(STATUS "Using getrandom for hashtable randomness")
+ set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM)
+ set(randomness_extra_depends
+ libc.src.sys.random.getrandom libc.src.errno.errno)
+endif()
+
+
+add_header_library(
+ table
+ HDRS
+ table.h
+ DEPENDS
+ .bitmask
+ libc.src.__support.memory_size
+ libc.src.__support.bit
+ libc.src.__support.CPP.type_traits
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.optimization
+ libc.src.__support.hash
+ libc.src.string.memset
+ libc.src.string.strcmp
+ libc.src.string.strlen
+ libc.include.stdlib
+ libc.include.llvm-libc-types.ENTRY
+)
+
+add_header_library(
+ randomness
+ HDRS
+ randomness.h
+ DEPENDS
+ libc.src.__support.hash
+ libc.src.__support.common
+ ${randomness_extra_depends}
+ FLAGS
+ ${randomness_compile_flags}
+)
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
new file mode 100644
index 000000000000000..4f14c32e1ac25e7
--- /dev/null
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -0,0 +1,91 @@
+//===-- HashTable BitMasks --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
+#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
+
+#include "src/__support/bit.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t, uint64_t
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+// Implementations of the bitmask.
+// The backend word type may vary depending on different microarchitectures.
+// For example, with X86 SSE2, the bitmask is just the 16bit unsigned integer
+// corresponding to lanes in a SIMD register.
+//
+// Notice that this implementation is simplified from traditional swisstable:
+// since we do not support deletion, we only need to care about if the highest
+// bit is set or not:
+// =============================
+// | Slot Status | Bitmask |
+// =============================
+// | Available | 0b1xxx'xxxx |
+// | Occupied | 0b0xxx'xxxx |
+// =============================
+template <typename T, T WORD_MASK, size_t WORD_STRIDE> struct BitMaskAdaptor {
+ // A masked constant whose bits are all set.
+ constexpr static inline T MASK = WORD_MASK;
+ // A stride in the bitmask may use multiple bits.
+ constexpr static inline size_t STRIDE = WORD_STRIDE;
+
+ T word;
+
+ // Check if any bit is set inside the word.
+ bool any_bit_set() const { return word != 0; }
+
+ // Count trailing zeros with respect to stride. (Assume the bitmask is none
+ // zero.)
+ size_t lowest_set_bit_nonzero() const {
+ return unsafe_ctz<T>(word) / WORD_STRIDE;
+ }
+};
+
+// Not all bitmasks are iterable --- only those who has only MSB set in each
+// lane. Hence, we make the types nomially different to distinguish them.
+template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
+ // Use the bitmask as an iterator. Update the state and return current lowest
+ // set bit. To make the bitmask iterable, each stride must contain 0 or exact
+ // 1 set bit.
+ void remove_lowest_bit() {
+ // Remove the last set bit inside the word:
+ // word = 011110100 (original value)
+ // word - 1 = 011110011 (invert all bits up to the last set bit)
+ // word & (word - 1) = 011110000 (value with the last bit cleared)
+ this->word = this->word & (this->word - 1);
+ }
+ using value_type = size_t;
+ using iterator = BitMask;
+ using const_iterator = BitMask;
+ size_t operator*() const { return this->lowest_set_bit_nonzero(); }
+ IteratableBitMaskAdaptor &operator++() {
+ this->remove_lowest_bit();
+ return *this;
+ }
+ IteratableBitMaskAdaptor begin() { return *this; }
+ IteratableBitMaskAdaptor end() { return {0}; }
+ bool operator==(const IteratableBitMaskAdaptor &other) {
+ return this->word == other.word;
+ }
+ bool operator!=(const IteratableBitMaskAdaptor &other) {
+ return this->word != other.word;
+ }
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE
+
+#if defined(__SSE2__)
+#include "sse2/bitmask_impl.inc"
+#else
+#include "generic/bitmask_impl.inc"
+#endif
+
+#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc
new file mode 100644
index 000000000000000..24268d963f84b84
--- /dev/null
+++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc
@@ -0,0 +1,102 @@
+//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/endian.h"
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+// Helper function to spread a byte across the whole word.
+// Accumutively, the procedure looks like:
+// byte = 0x00000000000000ff
+// byte | (byte << 8) = 0x000000000000ffff
+// byte | (byte << 16) = 0x00000000ffffffff
+// byte | (byte << 32) = 0xffffffffffffffff
+constexpr static inline uintptr_t repeat_byte(uintptr_t byte) {
+ size_t shift_amount = 8;
+ while (shift_amount < sizeof(uintptr_t) * 8) {
+ byte |= byte << shift_amount;
+ shift_amount <<= 1;
+ }
+ return byte;
+}
+
+using BitMask = BitMaskAdaptor<uintptr_t, repeat_byte(0x80), 0x8ull>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+ uintptr_t data;
+
+ // Load a group of control words from an arbitary address.
+ static Group load(const void *__restrict addr) {
+ union {
+ uintptr_t value;
+ char bytes[sizeof(uintptr_t)];
+ } data;
+ for (size_t i = 0; i < sizeof(uintptr_t); ++i)
+ data.bytes[i] = static_cast<const char *>(addr)[i];
+ return {data.value};
+ }
+
+ // Find out the lanes equal to the given byte and return t...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/73469
More information about the libc-commits
mailing list