[libc-commits] [libc] [libc] [search] implement hcreate(_r)/hsearch(_r)/hdestroy(_r) (PR #73469)

via libc-commits libc-commits at lists.llvm.org
Sun Nov 26 16:32:25 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Schrodinger ZHU Yifan (SchrodingerZhu)

<details>
<summary>Changes</summary>

This patch implements `implement hcreate(_r)/hsearch(_r)/hdestroy(_r)` as specified in https://man7.org/linux/man-pages/man3/hsearch.3.html.

Notice that `neon/asimd` extension is not yet added in this patch. 

- The implementation is largely simplified from rust's [`hashbrown`](https://github.com/rust-lang/hashbrown/blob/master/src/raw/mod.rs) as we only consider fix-sized insertion-only hashtables. Technical details are provided in code comments.

- This patch also contains a portable string hash function, which is derived from [`aHash`](https://github.com/tkaitchuck/aHash)'s fallback routine. Not using any SIMD acceleration, it has a good enough quality (passing all SMHasher tests) and is not too bad in speed.

- Some general functionalities are added, such as `memory_size`, `offset_to`(alignment), `next_power_of_two`, `is_power_of_two`. `ctz/clz` are extended to support shorter integers.

---

Patch is 93.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73469.diff


58 Files Affected:

- (modified) libc/config/linux/aarch64/entrypoints.txt (+8) 
- (modified) libc/config/linux/aarch64/headers.txt (+1) 
- (modified) libc/config/linux/api.td (+4) 
- (modified) libc/config/linux/arm/entrypoints.txt (+8) 
- (modified) libc/config/linux/arm/headers.txt (+1) 
- (modified) libc/config/linux/riscv/entrypoints.txt (+8) 
- (modified) libc/config/linux/riscv/headers.txt (+1) 
- (modified) libc/config/linux/x86_64/entrypoints.txt (+8) 
- (modified) libc/config/linux/x86_64/headers.txt (+1) 
- (modified) libc/include/CMakeLists.txt (+12) 
- (added) libc/include/llvm-libc-types/ACTION.h (+14) 
- (modified) libc/include/llvm-libc-types/CMakeLists.txt (+3) 
- (added) libc/include/llvm-libc-types/ENTRY.h (+17) 
- (added) libc/include/llvm-libc-types/struct_hsearch_data.h (+17) 
- (added) libc/include/search.h.def (+18) 
- (modified) libc/spec/gnu_ext.td (+39-1) 
- (modified) libc/spec/posix.td (+34-1) 
- (modified) libc/spec/spec.td (+5) 
- (modified) libc/src/CMakeLists.txt (+1) 
- (modified) libc/src/__support/CMakeLists.txt (+22) 
- (added) libc/src/__support/HashTable/CMakeLists.txt (+48) 
- (added) libc/src/__support/HashTable/bitmask.h (+91) 
- (added) libc/src/__support/HashTable/generic/bitmask_impl.inc (+102) 
- (added) libc/src/__support/HashTable/randomness.h (+59) 
- (added) libc/src/__support/HashTable/sse2/bitmask_impl.inc (+40) 
- (added) libc/src/__support/HashTable/table.h (+229) 
- (modified) libc/src/__support/bit.h (+27) 
- (added) libc/src/__support/hash.h (+161) 
- (added) libc/src/__support/memory_size.h (+72) 
- (added) libc/src/search/CMakeLists.txt (+77) 
- (added) libc/src/search/hcreate.cpp (+28) 
- (added) libc/src/search/hcreate.h (+18) 
- (added) libc/src/search/hcreate_r.cpp (+32) 
- (added) libc/src/search/hcreate_r.h (+18) 
- (added) libc/src/search/hdestroy.cpp (+20) 
- (added) libc/src/search/hdestroy.h (+18) 
- (added) libc/src/search/hdestroy_r.cpp (+25) 
- (added) libc/src/search/hdestroy_r.h (+18) 
- (added) libc/src/search/hsearch.cpp (+35) 
- (added) libc/src/search/hsearch.h (+18) 
- (added) libc/src/search/hsearch/CMakeLists.txt (+7) 
- (added) libc/src/search/hsearch/global.cpp (+13) 
- (added) libc/src/search/hsearch/global.h (+13) 
- (added) libc/src/search/hsearch_r.cpp (+42) 
- (added) libc/src/search/hsearch_r.h (+19) 
- (modified) libc/test/src/CMakeLists.txt (+1) 
- (modified) libc/test/src/__support/CMakeLists.txt (+24) 
- (added) libc/test/src/__support/HashTable/CMakeLists.txt (+67) 
- (added) libc/test/src/__support/HashTable/bitmask_test.cpp (+69) 
- (added) libc/test/src/__support/HashTable/group_test.cpp (+91) 
- (added) libc/test/src/__support/HashTable/table_test.cpp (+77) 
- (modified) libc/test/src/__support/bit_test.cpp (+17) 
- (added) libc/test/src/__support/hash_test.cpp (+135) 
- (added) libc/test/src/__support/memory_size_test.cpp (+86) 
- (added) libc/test/src/search/CMakeLists.txt (+16) 
- (added) libc/test/src/search/hsearch_test.cpp (+124) 
- (added) libc/utils/smhasher/smhasher.patch (+205) 
- (added) libc/utils/smhasher/smhasher.txt (+1) 


``````````diff
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 284feb7b99096ec..ecefa5884adb3eb 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -130,6 +130,14 @@ set(TARGET_LIBC_ENTRYPOINTS
     #libc.src.stdio.scanf
     #libc.src.stdio.fscanf
 
+    # search.h entrypoints
+    libc.src.search.hcreate
+    libc.src.search.hcreate_r
+    libc.src.search.hsearch
+    libc.src.search.hsearch_r
+    libc.src.search.hdestroy
+    libc.src.search.hdestroy_r
+
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
     libc.src.sys.mman.mmap
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index c47e05c924fd94f..cfca5959b5ffa57 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -12,6 +12,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.stdlib
     libc.include.string
     libc.include.strings
+    libc.include.search
     libc.include.sys_mman
     libc.include.sys_socket
     libc.include.sys_syscall
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 377763b97cfd958..726e58f376eaa76 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -248,3 +248,7 @@ def TermiosAPI : PublicAPI<"termios.h"> {
 def SetJmpAPI : PublicAPI<"setjmp.h"> {
   let Types = ["jmp_buf"];
 }
+
+def SearchAPI : PublicAPI<"search.h"> {
+  let Types = ["ACTION", "ENTRY", "struct hsearch_data"];
+}
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 27c0b8e5b3a3aa2..ee701c04b2e2a8a 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -89,6 +89,14 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.strtoul
     libc.src.stdlib.strtoull
 
+    # search.h entrypoints
+    libc.src.search.hcreate
+    libc.src.search.hcreate_r
+    libc.src.search.hsearch
+    libc.src.search.hsearch_r
+    libc.src.search.hdestroy
+    libc.src.search.hdestroy_r
+
     # sys/mman.h entrypoints
     libc.src.sys.mman.mmap
     libc.src.sys.mman.munmap
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index fe7c88e922e07e8..bd08d8f8fa437fb 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -7,4 +7,5 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.stdlib
     libc.include.string
     libc.include.strings
+    libc.include.search
 )
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index a5f0c91e32d0810..1ccb40108bd8507 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -136,6 +136,14 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.scanf
     libc.src.stdio.fscanf
 
+    # search.h entrypoints
+    libc.src.search.hcreate
+    libc.src.search.hcreate_r
+    libc.src.search.hsearch
+    libc.src.search.hsearch_r
+    libc.src.search.hdestroy
+    libc.src.search.hdestroy_r
+
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
     libc.src.sys.mman.mmap
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 24247ee5819f94a..3e2b1630f1695eb 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.stdlib
     libc.include.string
     libc.include.strings
+    libc.include.search
     libc.include.termios
     libc.include.threads
     libc.include.time
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 63aa7473115a08e..43266e0e5b66e61 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -497,6 +497,14 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.spawn.posix_spawn_file_actions_destroy
     libc.src.spawn.posix_spawn_file_actions_init
 
+    # search.h entrypoints
+    libc.src.search.hcreate
+    libc.src.search.hcreate_r
+    libc.src.search.hsearch
+    libc.src.search.hsearch_r
+    libc.src.search.hdestroy
+    libc.src.search.hdestroy_r
+
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 24247ee5819f94a..3e2b1630f1695eb 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.stdlib
     libc.include.string
     libc.include.strings
+    libc.include.search
     libc.include.termios
     libc.include.threads
     libc.include.time
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 9d170603ffa45cd..429c0f1f12866a8 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -133,6 +133,18 @@ add_gen_header(
     .llvm-libc-types.size_t
 )
 
+add_gen_header(
+  search
+  DEF_FILE search.h.def
+  GEN_HDR search.h
+  DEPENDS
+    .llvm_libc_common_h
+    .llvm-libc-types.ACTION
+    .llvm-libc-types.ENTRY
+    .llvm-libc-types.struct_hsearch_data
+    .llvm-libc-types.size_t
+)
+
 add_gen_header(
   time
   DEF_FILE time.h.def
diff --git a/libc/include/llvm-libc-types/ACTION.h b/libc/include/llvm-libc-types/ACTION.h
new file mode 100644
index 000000000000000..7181a59b177d6b6
--- /dev/null
+++ b/libc/include/llvm-libc-types/ACTION.h
@@ -0,0 +1,14 @@
+//===-- Definition of ACTION type -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ACTION_H__
+#define __LLVM_LIBC_TYPES_ACTION_H__
+
+typedef enum { FIND, ENTER } ACTION;
+
+#endif // __LLVM_LIBC_TYPES_ACTION_H__
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 3c0cc7bbc71dacb..225ad780c4d01f2 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -91,3 +91,6 @@ add_header(wint_t HDR wint_t.h)
 add_header(sa_family_t HDR sa_family_t.h)
 add_header(struct_sockaddr HDR struct_sockaddr.h)
 add_header(rpc_opcodes_t HDR rpc_opcodes_t.h)
+add_header(ACTION HDR ACTION.h)
+add_header(ENTRY HDR ENTRY.h)
+add_header(struct_hsearch_data HDR struct_hsearch_data.h)
diff --git a/libc/include/llvm-libc-types/ENTRY.h b/libc/include/llvm-libc-types/ENTRY.h
new file mode 100644
index 000000000000000..0ccb5938207acc8
--- /dev/null
+++ b/libc/include/llvm-libc-types/ENTRY.h
@@ -0,0 +1,17 @@
+//===-- Definition of ENTRY type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ENTRY_H__
+#define __LLVM_LIBC_TYPES_ENTRY_H__
+
+typedef struct {
+  char *key;
+  void *data;
+} ENTRY;
+
+#endif // __LLVM_LIBC_TYPES_ENTRY_H__
diff --git a/libc/include/llvm-libc-types/struct_hsearch_data.h b/libc/include/llvm-libc-types/struct_hsearch_data.h
new file mode 100644
index 000000000000000..7e2a7232fce5358
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_hsearch_data.h
@@ -0,0 +1,17 @@
+//===-- Definition of type struct hsearch_data ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
+#define __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
+
+struct hsearch_data {
+  void *__opaque;
+  unsigned int __unused[2];
+};
+
+#endif // __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
diff --git a/libc/include/search.h.def b/libc/include/search.h.def
new file mode 100644
index 000000000000000..3435c1f8ad048ea
--- /dev/null
+++ b/libc/include/search.h.def
@@ -0,0 +1,18 @@
+//===-- POSIX header search.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SEARCH_H
+#define LLVM_LIBC_SEARCH_H
+
+#include <__llvm-libc-common.h>
+#define __need_size_t
+#include <stddef.h>
+
+%%public_api()
+
+#endif // LLVM_LIBC_SEARCH_H
diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td
index dfb12419d14005b..cb0407c84d4e212 100644
--- a/libc/spec/gnu_ext.td
+++ b/libc/spec/gnu_ext.td
@@ -3,6 +3,8 @@ def CpuSetPtr : PtrType<CpuSetT>;
 def ConstCpuSetPtr : ConstType<CpuSetPtr>;
 
 def QSortRCompareT : NamedType<"__qsortrcompare_t">;
+def StructHsearchData : NamedType<"struct hsearch_data">;
+def StructHsearchDataPtr : PtrType<StructHsearchData>;
 
 def GnuExtensions : StandardSpec<"GNUExtensions"> {
   NamedType CookieIOFunctionsT = NamedType<"cookie_io_functions_t">;
@@ -54,7 +56,6 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
         >,
       ]
   >;
-
   HeaderSpec String = HeaderSpec<
       "string.h",
       [], // Macros
@@ -89,6 +90,42 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
       ]
   >;
 
+  HeaderSpec Search = HeaderSpec<
+    "search.h",
+    [], // Macros
+    [
+        StructHsearchData
+    ],
+    [], // Enumerations
+    [
+        FunctionSpec<
+            "hcreate_r",
+            RetValSpec<IntType>,
+            [  
+                ArgSpec<SizeTType>, 
+                ArgSpec<StructHsearchDataPtr>
+            ]
+        >,
+        FunctionSpec<
+            "hdestroy_r",
+            RetValSpec<VoidType>,
+            [
+                ArgSpec<StructHsearchDataPtr>
+            ]
+        >,
+        FunctionSpec<
+            "hsearch_r",
+            RetValSpec<IntType>,
+            [
+                ArgSpec<EntryType>, 
+                ArgSpec<ActionType>,
+                ArgSpec<EntryTypePtrPtr>,
+                ArgSpec<StructHsearchDataPtr>
+            ]
+        >,
+    ]
+  >;
+
   HeaderSpec FEnv = HeaderSpec<
       "fenv.h",
       [], // Macros
@@ -243,6 +280,7 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
     StdIO,
     StdLib,
     String,
+    Search,
     UniStd,
   ];
 }
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index a367cf2a6935c02..c7acf6d25a2d873 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -1269,6 +1269,38 @@ def POSIX : StandardSpec<"POSIX"> {
     ]
   >;
 
+  HeaderSpec Search = HeaderSpec<
+    "search.h",
+    [], // Macros
+    [
+        ActionType,
+        EntryType
+    ], // Types
+    [], // Enumerations
+    [
+        FunctionSpec<
+            "hcreate",
+            RetValSpec<IntType>,
+            [
+                ArgSpec<SizeTType>
+            ]
+        >,
+        FunctionSpec<
+            "hdestroy",
+            RetValSpec<VoidType>,
+            [] // Args
+        >,
+        FunctionSpec<
+            "hsearch",
+            RetValSpec<EntryTypePtr>,
+            [
+                ArgSpec<EntryType>, 
+                ArgSpec<ActionType>
+            ]
+        >,
+    ]
+  >; 
+
   HeaderSpec Termios = HeaderSpec<
     "termios.h",
     [
@@ -1414,6 +1446,7 @@ def POSIX : StandardSpec<"POSIX"> {
     Time,
     Termios,
     UniStd,
-    String
+    String,
+    Search,
   ];
 }
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index b0d5511a4f087ee..9b689b5eb502a9f 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -140,6 +140,11 @@ def SuSecondsT : NamedType<"suseconds_t">;
 //added because __assert_fail needs it.
 def UnsignedType : NamedType<"unsigned">;
 
+def ActionType : NamedType<"ACTION">;
+def EntryType : NamedType<"ENTRY">;
+def EntryTypePtr : PtrType<EntryType>;
+def EntryTypePtrPtr : PtrType<EntryTypePtr>;
+
 class Macro<string name> {
   string Name = name;
 }
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 88838eecc53c9a1..3ab62a4f667d260 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -35,3 +35,4 @@ add_subdirectory(signal)
 add_subdirectory(spawn)
 add_subdirectory(threads)
 add_subdirectory(time)
+add_subdirectory(search)
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index b939fae3be791da..cdd9d9dfe55a4d4 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -230,6 +230,26 @@ add_header_library(
     libc.src.__support.OSUtil.osutil
 )
 
+add_header_library(
+  hash
+  HDRS
+    hash.h
+  DEPENDS
+    .common
+    .uint128
+)
+
+add_header_library(
+  memory_size
+  HDRS
+    memory_size.h
+  DEPENDS
+    libc.src.__support.CPP.type_traits
+    libc.src.__support.CPP.limits
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.attributes
+)
+
 add_subdirectory(FPUtil)
 add_subdirectory(OSUtil)
 add_subdirectory(StringUtil)
@@ -241,3 +261,5 @@ add_subdirectory(RPC)
 add_subdirectory(threads)
 
 add_subdirectory(File)
+
+add_subdirectory(HashTable)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
new file mode 100644
index 000000000000000..ae2fb640141c18c
--- /dev/null
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -0,0 +1,48 @@
+add_header_library(
+  bitmask
+  HDRS
+    bitmask.h
+  DEPENDS
+    libc.src.__support.common
+    libc.src.__support.bit
+)
+
+list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index)
+if (NOT ${getrandom_index} EQUAL -1)
+  message(STATUS "Using getrandom for hashtable randomness")
+  set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM)
+  set(randomness_extra_depends 
+    libc.src.sys.random.getrandom libc.src.errno.errno)
+endif()
+
+
+add_header_library(
+  table
+  HDRS
+    table.h
+  DEPENDS
+    .bitmask
+    libc.src.__support.memory_size
+    libc.src.__support.bit
+    libc.src.__support.CPP.type_traits
+    libc.src.__support.macros.attributes
+    libc.src.__support.macros.optimization
+    libc.src.__support.hash
+    libc.src.string.memset
+    libc.src.string.strcmp
+    libc.src.string.strlen
+    libc.include.stdlib
+    libc.include.llvm-libc-types.ENTRY
+)
+
+add_header_library(
+  randomness
+  HDRS
+    randomness.h
+  DEPENDS
+    libc.src.__support.hash
+    libc.src.__support.common
+    ${randomness_extra_depends}
+  FLAGS
+    ${randomness_compile_flags}
+)
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
new file mode 100644
index 000000000000000..4f14c32e1ac25e7
--- /dev/null
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -0,0 +1,91 @@
+//===-- HashTable BitMasks --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
+#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
+
+#include "src/__support/bit.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t, uint64_t
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+// Implementations of the bitmask.
+// The backend word type may vary depending on different microarchitectures.
+// For example, with X86 SSE2, the bitmask is just the 16bit unsigned integer
+// corresponding to lanes in a SIMD register.
+//
+// Notice that this implementation is simplified from traditional swisstable:
+// since we do not support deletion, we only need to care about if the highest
+// bit is set or not:
+// =============================
+// | Slot Status |   Bitmask   |
+// =============================
+// |  Available  | 0b1xxx'xxxx |
+// |  Occupied   | 0b0xxx'xxxx |
+// =============================
+template <typename T, T WORD_MASK, size_t WORD_STRIDE> struct BitMaskAdaptor {
+  // A masked constant whose bits are all set.
+  constexpr static inline T MASK = WORD_MASK;
+  // A stride in the bitmask may use multiple bits.
+  constexpr static inline size_t STRIDE = WORD_STRIDE;
+
+  T word;
+
+  // Check if any bit is set inside the word.
+  bool any_bit_set() const { return word != 0; }
+
+  // Count trailing zeros with respect to stride. (Assume the bitmask is none
+  // zero.)
+  size_t lowest_set_bit_nonzero() const {
+    return unsafe_ctz<T>(word) / WORD_STRIDE;
+  }
+};
+
+// Not all bitmasks are iterable --- only those who has only MSB set in each
+// lane. Hence, we make the types nomially different to distinguish them.
+template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
+  // Use the bitmask as an iterator. Update the state and return current lowest
+  // set bit. To make the bitmask iterable, each stride must contain 0 or exact
+  // 1 set bit.
+  void remove_lowest_bit() {
+    // Remove the last set bit inside the word:
+    //    word              = 011110100 (original value)
+    //    word - 1          = 011110011 (invert all bits up to the last set bit)
+    //    word & (word - 1) = 011110000 (value with the last bit cleared)
+    this->word = this->word & (this->word - 1);
+  }
+  using value_type = size_t;
+  using iterator = BitMask;
+  using const_iterator = BitMask;
+  size_t operator*() const { return this->lowest_set_bit_nonzero(); }
+  IteratableBitMaskAdaptor &operator++() {
+    this->remove_lowest_bit();
+    return *this;
+  }
+  IteratableBitMaskAdaptor begin() { return *this; }
+  IteratableBitMaskAdaptor end() { return {0}; }
+  bool operator==(const IteratableBitMaskAdaptor &other) {
+    return this->word == other.word;
+  }
+  bool operator!=(const IteratableBitMaskAdaptor &other) {
+    return this->word != other.word;
+  }
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE
+
+#if defined(__SSE2__)
+#include "sse2/bitmask_impl.inc"
+#else
+#include "generic/bitmask_impl.inc"
+#endif
+
+#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc
new file mode 100644
index 000000000000000..24268d963f84b84
--- /dev/null
+++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc
@@ -0,0 +1,102 @@
+//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/endian.h"
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+// Helper function to spread a byte across the whole word.
+// Accumutively, the procedure looks like:
+//    byte                  = 0x00000000000000ff
+//    byte | (byte << 8)    = 0x000000000000ffff
+//    byte | (byte << 16)   = 0x00000000ffffffff
+//    byte | (byte << 32)   = 0xffffffffffffffff
+constexpr static inline uintptr_t repeat_byte(uintptr_t byte) {
+  size_t shift_amount = 8;
+  while (shift_amount < sizeof(uintptr_t) * 8) {
+    byte |= byte << shift_amount;
+    shift_amount <<= 1;
+  }
+  return byte;
+}
+
+using BitMask = BitMaskAdaptor<uintptr_t, repeat_byte(0x80), 0x8ull>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+  uintptr_t data;
+
+  // Load a group of control words from an arbitary address.
+  static Group load(const void *__restrict addr) {
+    union {
+      uintptr_t value;
+      char bytes[sizeof(uintptr_t)];
+    } data;
+    for (size_t i = 0; i < sizeof(uintptr_t); ++i)
+      data.bytes[i] = static_cast<const char *>(addr)[i];
+    return {data.value};
+  }
+
+  // Find out the lanes equal to the given byte and return t...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/73469


More information about the libc-commits mailing list