[libc-commits] [libc] [libc] [search] implement hcreate(_r)/hsearch(_r)/hdestroy(_r) (PR #73469)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Tue Nov 28 11:27:19 PST 2023
https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/73469
>From 42ababe17c47e998d4281d468484da400963ccd0 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 24 Nov 2023 15:17:37 -0500
Subject: [PATCH 1/8] [libc] [search] implement
hcreate(_r)/hsearch(_r)/hdestroy(_r)
https://man7.org/linux/man-pages/man3/hsearch.3.html
---
libc/config/linux/aarch64/entrypoints.txt | 8 +
libc/config/linux/aarch64/headers.txt | 1 +
libc/config/linux/api.td | 4 +
libc/config/linux/arm/entrypoints.txt | 8 +
libc/config/linux/arm/headers.txt | 1 +
libc/config/linux/riscv/entrypoints.txt | 8 +
libc/config/linux/riscv/headers.txt | 1 +
libc/config/linux/x86_64/entrypoints.txt | 8 +
libc/config/linux/x86_64/headers.txt | 1 +
libc/include/CMakeLists.txt | 12 +
libc/include/llvm-libc-types/ACTION.h | 14 ++
libc/include/llvm-libc-types/CMakeLists.txt | 3 +
libc/include/llvm-libc-types/ENTRY.h | 17 ++
.../llvm-libc-types/struct_hsearch_data.h | 17 ++
libc/include/search.h.def | 18 ++
libc/spec/gnu_ext.td | 40 ++-
libc/spec/posix.td | 35 ++-
libc/spec/spec.td | 5 +
libc/src/CMakeLists.txt | 1 +
libc/src/__support/CMakeLists.txt | 22 ++
libc/src/__support/HashTable/CMakeLists.txt | 48 ++++
libc/src/__support/HashTable/bitmask.h | 91 +++++++
.../HashTable/generic/bitmask_impl.inc | 102 ++++++++
libc/src/__support/HashTable/randomness.h | 59 +++++
.../__support/HashTable/sse2/bitmask_impl.inc | 40 +++
libc/src/__support/HashTable/table.h | 229 ++++++++++++++++++
libc/src/__support/bit.h | 29 +++
libc/src/__support/hash.h | 161 ++++++++++++
libc/src/__support/memory_size.h | 72 ++++++
libc/src/search/CMakeLists.txt | 77 ++++++
libc/src/search/hcreate.cpp | 28 +++
libc/src/search/hcreate.h | 18 ++
libc/src/search/hcreate_r.cpp | 32 +++
libc/src/search/hcreate_r.h | 18 ++
libc/src/search/hdestroy.cpp | 20 ++
libc/src/search/hdestroy.h | 18 ++
libc/src/search/hdestroy_r.cpp | 25 ++
libc/src/search/hdestroy_r.h | 18 ++
libc/src/search/hsearch.cpp | 35 +++
libc/src/search/hsearch.h | 18 ++
libc/src/search/hsearch/CMakeLists.txt | 7 +
libc/src/search/hsearch/global.cpp | 13 +
libc/src/search/hsearch/global.h | 13 +
libc/src/search/hsearch_r.cpp | 42 ++++
libc/src/search/hsearch_r.h | 19 ++
libc/test/src/CMakeLists.txt | 1 +
libc/test/src/__support/CMakeLists.txt | 24 ++
.../src/__support/HashTable/CMakeLists.txt | 67 +++++
.../src/__support/HashTable/bitmask_test.cpp | 69 ++++++
.../src/__support/HashTable/group_test.cpp | 91 +++++++
.../src/__support/HashTable/table_test.cpp | 77 ++++++
libc/test/src/__support/bit_test.cpp | 17 ++
libc/test/src/__support/hash_test.cpp | 135 +++++++++++
libc/test/src/__support/memory_size_test.cpp | 86 +++++++
libc/test/src/search/CMakeLists.txt | 16 ++
libc/test/src/search/hsearch_test.cpp | 124 ++++++++++
libc/utils/smhasher/smhasher.patch | 205 ++++++++++++++++
libc/utils/smhasher/smhasher.txt | 1 +
58 files changed, 2367 insertions(+), 2 deletions(-)
create mode 100644 libc/include/llvm-libc-types/ACTION.h
create mode 100644 libc/include/llvm-libc-types/ENTRY.h
create mode 100644 libc/include/llvm-libc-types/struct_hsearch_data.h
create mode 100644 libc/include/search.h.def
create mode 100644 libc/src/__support/HashTable/CMakeLists.txt
create mode 100644 libc/src/__support/HashTable/bitmask.h
create mode 100644 libc/src/__support/HashTable/generic/bitmask_impl.inc
create mode 100644 libc/src/__support/HashTable/randomness.h
create mode 100644 libc/src/__support/HashTable/sse2/bitmask_impl.inc
create mode 100644 libc/src/__support/HashTable/table.h
create mode 100644 libc/src/__support/hash.h
create mode 100644 libc/src/__support/memory_size.h
create mode 100644 libc/src/search/CMakeLists.txt
create mode 100644 libc/src/search/hcreate.cpp
create mode 100644 libc/src/search/hcreate.h
create mode 100644 libc/src/search/hcreate_r.cpp
create mode 100644 libc/src/search/hcreate_r.h
create mode 100644 libc/src/search/hdestroy.cpp
create mode 100644 libc/src/search/hdestroy.h
create mode 100644 libc/src/search/hdestroy_r.cpp
create mode 100644 libc/src/search/hdestroy_r.h
create mode 100644 libc/src/search/hsearch.cpp
create mode 100644 libc/src/search/hsearch.h
create mode 100644 libc/src/search/hsearch/CMakeLists.txt
create mode 100644 libc/src/search/hsearch/global.cpp
create mode 100644 libc/src/search/hsearch/global.h
create mode 100644 libc/src/search/hsearch_r.cpp
create mode 100644 libc/src/search/hsearch_r.h
create mode 100644 libc/test/src/__support/HashTable/CMakeLists.txt
create mode 100644 libc/test/src/__support/HashTable/bitmask_test.cpp
create mode 100644 libc/test/src/__support/HashTable/group_test.cpp
create mode 100644 libc/test/src/__support/HashTable/table_test.cpp
create mode 100644 libc/test/src/__support/hash_test.cpp
create mode 100644 libc/test/src/__support/memory_size_test.cpp
create mode 100644 libc/test/src/search/CMakeLists.txt
create mode 100644 libc/test/src/search/hsearch_test.cpp
create mode 100644 libc/utils/smhasher/smhasher.patch
create mode 100644 libc/utils/smhasher/smhasher.txt
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 284feb7b99096ec..ecefa5884adb3eb 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -130,6 +130,14 @@ set(TARGET_LIBC_ENTRYPOINTS
#libc.src.stdio.scanf
#libc.src.stdio.fscanf
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# sys/mman.h entrypoints
libc.src.sys.mman.madvise
libc.src.sys.mman.mmap
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index c47e05c924fd94f..cfca5959b5ffa57 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -12,6 +12,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
libc.include.sys_mman
libc.include.sys_socket
libc.include.sys_syscall
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 377763b97cfd958..726e58f376eaa76 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -248,3 +248,7 @@ def TermiosAPI : PublicAPI<"termios.h"> {
def SetJmpAPI : PublicAPI<"setjmp.h"> {
let Types = ["jmp_buf"];
}
+
+def SearchAPI : PublicAPI<"search.h"> {
+ let Types = ["ACTION", "ENTRY", "struct hsearch_data"];
+}
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 27c0b8e5b3a3aa2..ee701c04b2e2a8a 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -89,6 +89,14 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.strtoul
libc.src.stdlib.strtoull
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# sys/mman.h entrypoints
libc.src.sys.mman.mmap
libc.src.sys.mman.munmap
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index fe7c88e922e07e8..bd08d8f8fa437fb 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -7,4 +7,5 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
)
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index a5f0c91e32d0810..1ccb40108bd8507 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -136,6 +136,14 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdio.scanf
libc.src.stdio.fscanf
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# sys/mman.h entrypoints
libc.src.sys.mman.madvise
libc.src.sys.mman.mmap
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 24247ee5819f94a..3e2b1630f1695eb 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
libc.include.termios
libc.include.threads
libc.include.time
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 63aa7473115a08e..43266e0e5b66e61 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -497,6 +497,14 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.spawn.posix_spawn_file_actions_destroy
libc.src.spawn.posix_spawn_file_actions_init
+ # search.h entrypoints
+ libc.src.search.hcreate
+ libc.src.search.hcreate_r
+ libc.src.search.hsearch
+ libc.src.search.hsearch_r
+ libc.src.search.hdestroy
+ libc.src.search.hdestroy_r
+
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 24247ee5819f94a..3e2b1630f1695eb 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -17,6 +17,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.stdlib
libc.include.string
libc.include.strings
+ libc.include.search
libc.include.termios
libc.include.threads
libc.include.time
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 9d170603ffa45cd..429c0f1f12866a8 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -133,6 +133,18 @@ add_gen_header(
.llvm-libc-types.size_t
)
+add_gen_header(
+ search
+ DEF_FILE search.h.def
+ GEN_HDR search.h
+ DEPENDS
+ .llvm_libc_common_h
+ .llvm-libc-types.ACTION
+ .llvm-libc-types.ENTRY
+ .llvm-libc-types.struct_hsearch_data
+ .llvm-libc-types.size_t
+)
+
add_gen_header(
time
DEF_FILE time.h.def
diff --git a/libc/include/llvm-libc-types/ACTION.h b/libc/include/llvm-libc-types/ACTION.h
new file mode 100644
index 000000000000000..7181a59b177d6b6
--- /dev/null
+++ b/libc/include/llvm-libc-types/ACTION.h
@@ -0,0 +1,14 @@
+//===-- Definition of ACTION type -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ACTION_H__
+#define __LLVM_LIBC_TYPES_ACTION_H__
+
+typedef enum { FIND, ENTER } ACTION;
+
+#endif // __LLVM_LIBC_TYPES_ACTION_H__
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 3c0cc7bbc71dacb..225ad780c4d01f2 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -91,3 +91,6 @@ add_header(wint_t HDR wint_t.h)
add_header(sa_family_t HDR sa_family_t.h)
add_header(struct_sockaddr HDR struct_sockaddr.h)
add_header(rpc_opcodes_t HDR rpc_opcodes_t.h)
+add_header(ACTION HDR ACTION.h)
+add_header(ENTRY HDR ENTRY.h)
+add_header(struct_hsearch_data HDR struct_hsearch_data.h)
diff --git a/libc/include/llvm-libc-types/ENTRY.h b/libc/include/llvm-libc-types/ENTRY.h
new file mode 100644
index 000000000000000..0ccb5938207acc8
--- /dev/null
+++ b/libc/include/llvm-libc-types/ENTRY.h
@@ -0,0 +1,17 @@
+//===-- Definition of ENTRY type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ENTRY_H__
+#define __LLVM_LIBC_TYPES_ENTRY_H__
+
+typedef struct {
+ char *key;
+ void *data;
+} ENTRY;
+
+#endif // __LLVM_LIBC_TYPES_ENTRY_H__
diff --git a/libc/include/llvm-libc-types/struct_hsearch_data.h b/libc/include/llvm-libc-types/struct_hsearch_data.h
new file mode 100644
index 000000000000000..7e2a7232fce5358
--- /dev/null
+++ b/libc/include/llvm-libc-types/struct_hsearch_data.h
@@ -0,0 +1,17 @@
+//===-- Definition of type struct hsearch_data ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
+#define __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
+
+struct hsearch_data {
+ void *__opaque;
+ unsigned int __unused[2];
+};
+
+#endif // __LLVM_LIBC_TYPES_STRUCT_HSEARCH_DATA_H__
diff --git a/libc/include/search.h.def b/libc/include/search.h.def
new file mode 100644
index 000000000000000..3435c1f8ad048ea
--- /dev/null
+++ b/libc/include/search.h.def
@@ -0,0 +1,18 @@
+//===-- POSIX header search.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SEARCH_H
+#define LLVM_LIBC_SEARCH_H
+
+#include <__llvm-libc-common.h>
+#define __need_size_t
+#include <stddef.h>
+
+%%public_api()
+
+#endif // LLVM_LIBC_SEARCH_H
diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td
index dfb12419d14005b..cb0407c84d4e212 100644
--- a/libc/spec/gnu_ext.td
+++ b/libc/spec/gnu_ext.td
@@ -3,6 +3,8 @@ def CpuSetPtr : PtrType<CpuSetT>;
def ConstCpuSetPtr : ConstType<CpuSetPtr>;
def QSortRCompareT : NamedType<"__qsortrcompare_t">;
+def StructHsearchData : NamedType<"struct hsearch_data">;
+def StructHsearchDataPtr : PtrType<StructHsearchData>;
def GnuExtensions : StandardSpec<"GNUExtensions"> {
NamedType CookieIOFunctionsT = NamedType<"cookie_io_functions_t">;
@@ -54,7 +56,6 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
>,
]
>;
-
HeaderSpec String = HeaderSpec<
"string.h",
[], // Macros
@@ -89,6 +90,42 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
]
>;
+ HeaderSpec Search = HeaderSpec<
+ "search.h",
+ [], // Macros
+ [
+ StructHsearchData
+ ],
+ [], // Enumerations
+ [
+ FunctionSpec<
+ "hcreate_r",
+ RetValSpec<IntType>,
+ [
+ ArgSpec<SizeTType>,
+ ArgSpec<StructHsearchDataPtr>
+ ]
+ >,
+ FunctionSpec<
+ "hdestroy_r",
+ RetValSpec<VoidType>,
+ [
+ ArgSpec<StructHsearchDataPtr>
+ ]
+ >,
+ FunctionSpec<
+ "hsearch_r",
+ RetValSpec<IntType>,
+ [
+ ArgSpec<EntryType>,
+ ArgSpec<ActionType>,
+ ArgSpec<EntryTypePtrPtr>,
+ ArgSpec<StructHsearchDataPtr>
+ ]
+ >,
+ ]
+ >;
+
HeaderSpec FEnv = HeaderSpec<
"fenv.h",
[], // Macros
@@ -243,6 +280,7 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> {
StdIO,
StdLib,
String,
+ Search,
UniStd,
];
}
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index a367cf2a6935c02..c7acf6d25a2d873 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -1269,6 +1269,38 @@ def POSIX : StandardSpec<"POSIX"> {
]
>;
+ HeaderSpec Search = HeaderSpec<
+ "search.h",
+ [], // Macros
+ [
+ ActionType,
+ EntryType
+ ], // Types
+ [], // Enumerations
+ [
+ FunctionSpec<
+ "hcreate",
+ RetValSpec<IntType>,
+ [
+ ArgSpec<SizeTType>
+ ]
+ >,
+ FunctionSpec<
+ "hdestroy",
+ RetValSpec<VoidType>,
+ [] // Args
+ >,
+ FunctionSpec<
+ "hsearch",
+ RetValSpec<EntryTypePtr>,
+ [
+ ArgSpec<EntryType>,
+ ArgSpec<ActionType>
+ ]
+ >,
+ ]
+ >;
+
HeaderSpec Termios = HeaderSpec<
"termios.h",
[
@@ -1414,6 +1446,7 @@ def POSIX : StandardSpec<"POSIX"> {
Time,
Termios,
UniStd,
- String
+ String,
+ Search,
];
}
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index b0d5511a4f087ee..9b689b5eb502a9f 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -140,6 +140,11 @@ def SuSecondsT : NamedType<"suseconds_t">;
//added because __assert_fail needs it.
def UnsignedType : NamedType<"unsigned">;
+def ActionType : NamedType<"ACTION">;
+def EntryType : NamedType<"ENTRY">;
+def EntryTypePtr : PtrType<EntryType>;
+def EntryTypePtrPtr : PtrType<EntryTypePtr>;
+
class Macro<string name> {
string Name = name;
}
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 88838eecc53c9a1..3ab62a4f667d260 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -35,3 +35,4 @@ add_subdirectory(signal)
add_subdirectory(spawn)
add_subdirectory(threads)
add_subdirectory(time)
+add_subdirectory(search)
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index b939fae3be791da..cdd9d9dfe55a4d4 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -230,6 +230,26 @@ add_header_library(
libc.src.__support.OSUtil.osutil
)
+add_header_library(
+ hash
+ HDRS
+ hash.h
+ DEPENDS
+ .common
+ .uint128
+)
+
+add_header_library(
+ memory_size
+ HDRS
+ memory_size.h
+ DEPENDS
+ libc.src.__support.CPP.type_traits
+ libc.src.__support.CPP.limits
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.attributes
+)
+
add_subdirectory(FPUtil)
add_subdirectory(OSUtil)
add_subdirectory(StringUtil)
@@ -241,3 +261,5 @@ add_subdirectory(RPC)
add_subdirectory(threads)
add_subdirectory(File)
+
+add_subdirectory(HashTable)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
new file mode 100644
index 000000000000000..ae2fb640141c18c
--- /dev/null
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -0,0 +1,48 @@
+add_header_library(
+ bitmask
+ HDRS
+ bitmask.h
+ DEPENDS
+ libc.src.__support.common
+ libc.src.__support.bit
+)
+
+list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index)
+if (NOT ${getrandom_index} EQUAL -1)
+ message(STATUS "Using getrandom for hashtable randomness")
+ set(randomness_compile_flags -DLIBC_HASHTABLE_USE_GETRANDOM)
+ set(randomness_extra_depends
+ libc.src.sys.random.getrandom libc.src.errno.errno)
+endif()
+
+
+add_header_library(
+ table
+ HDRS
+ table.h
+ DEPENDS
+ .bitmask
+ libc.src.__support.memory_size
+ libc.src.__support.bit
+ libc.src.__support.CPP.type_traits
+ libc.src.__support.macros.attributes
+ libc.src.__support.macros.optimization
+ libc.src.__support.hash
+ libc.src.string.memset
+ libc.src.string.strcmp
+ libc.src.string.strlen
+ libc.include.stdlib
+ libc.include.llvm-libc-types.ENTRY
+)
+
+add_header_library(
+ randomness
+ HDRS
+ randomness.h
+ DEPENDS
+ libc.src.__support.hash
+ libc.src.__support.common
+ ${randomness_extra_depends}
+ FLAGS
+ ${randomness_compile_flags}
+)
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
new file mode 100644
index 000000000000000..4f14c32e1ac25e7
--- /dev/null
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -0,0 +1,91 @@
+//===-- HashTable BitMasks --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
+#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
+
+#include "src/__support/bit.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t, uint64_t
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+// Implementations of the bitmask.
+// The backend word type may vary depending on different microarchitectures.
+// For example, with X86 SSE2, the bitmask is just the 16bit unsigned integer
+// corresponding to lanes in a SIMD register.
+//
+// Notice that this implementation is simplified from traditional swisstable:
+// since we do not support deletion, we only need to care about if the highest
+// bit is set or not:
+// =============================
+// | Slot Status | Bitmask |
+// =============================
+// | Available | 0b1xxx'xxxx |
+// | Occupied | 0b0xxx'xxxx |
+// =============================
+template <typename T, T WORD_MASK, size_t WORD_STRIDE> struct BitMaskAdaptor {
+ // A masked constant whose bits are all set.
+ constexpr static inline T MASK = WORD_MASK;
+ // A stride in the bitmask may use multiple bits.
+ constexpr static inline size_t STRIDE = WORD_STRIDE;
+
+ T word;
+
+ // Check if any bit is set inside the word.
+ bool any_bit_set() const { return word != 0; }
+
+ // Count trailing zeros with respect to stride. (Assume the bitmask is none
+ // zero.)
+ size_t lowest_set_bit_nonzero() const {
+ return unsafe_ctz<T>(word) / WORD_STRIDE;
+ }
+};
+
+// Not all bitmasks are iterable --- only those who has only MSB set in each
+// lane. Hence, we make the types nomially different to distinguish them.
+template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
+ // Use the bitmask as an iterator. Update the state and return current lowest
+ // set bit. To make the bitmask iterable, each stride must contain 0 or exact
+ // 1 set bit.
+ void remove_lowest_bit() {
+ // Remove the last set bit inside the word:
+ // word = 011110100 (original value)
+ // word - 1 = 011110011 (invert all bits up to the last set bit)
+ // word & (word - 1) = 011110000 (value with the last bit cleared)
+ this->word = this->word & (this->word - 1);
+ }
+ using value_type = size_t;
+ using iterator = BitMask;
+ using const_iterator = BitMask;
+ size_t operator*() const { return this->lowest_set_bit_nonzero(); }
+ IteratableBitMaskAdaptor &operator++() {
+ this->remove_lowest_bit();
+ return *this;
+ }
+ IteratableBitMaskAdaptor begin() { return *this; }
+ IteratableBitMaskAdaptor end() { return {0}; }
+ bool operator==(const IteratableBitMaskAdaptor &other) {
+ return this->word == other.word;
+ }
+ bool operator!=(const IteratableBitMaskAdaptor &other) {
+ return this->word != other.word;
+ }
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE
+
+#if defined(__SSE2__)
+#include "sse2/bitmask_impl.inc"
+#else
+#include "generic/bitmask_impl.inc"
+#endif
+
+#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc
new file mode 100644
index 000000000000000..24268d963f84b84
--- /dev/null
+++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc
@@ -0,0 +1,102 @@
+//===-- HashTable BitMasks Generic Implementation ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/endian.h"
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+// Helper function to spread a byte across the whole word.
+// Accumutively, the procedure looks like:
+// byte = 0x00000000000000ff
+// byte | (byte << 8) = 0x000000000000ffff
+// byte | (byte << 16) = 0x00000000ffffffff
+// byte | (byte << 32) = 0xffffffffffffffff
+constexpr static inline uintptr_t repeat_byte(uintptr_t byte) {
+ size_t shift_amount = 8;
+ while (shift_amount < sizeof(uintptr_t) * 8) {
+ byte |= byte << shift_amount;
+ shift_amount <<= 1;
+ }
+ return byte;
+}
+
+using BitMask = BitMaskAdaptor<uintptr_t, repeat_byte(0x80), 0x8ull>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+ uintptr_t data;
+
+ // Load a group of control words from an arbitary address.
+ static Group load(const void *__restrict addr) {
+ union {
+ uintptr_t value;
+ char bytes[sizeof(uintptr_t)];
+ } data;
+ for (size_t i = 0; i < sizeof(uintptr_t); ++i)
+ data.bytes[i] = static_cast<const char *>(addr)[i];
+ return {data.value};
+ }
+
+ // Find out the lanes equal to the given byte and return the bitmask
+ // with corresponding bits set.
+ IteratableBitMask match_byte(uint8_t byte) const {
+ // Given byte = 0x10, suppose the data is:
+ //
+ // data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]
+ //
+ // First, we compare the byte using XOR operation:
+ //
+ // [ 0x10 | 0x10 | 0x10 | 0x10 | ... ] (0)
+ // ^ [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ] (1)
+ // = [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2)
+ //
+ // Notice that the equal positions will now be 0x00, so if we substract 0x01
+ // respective to every byte, it will need to carry the substraction to upper
+ // bits (assume no carry from the hidden parts)
+ // [ 0x00 | 0x00 | 0x10 | 0xE1 | ... ] (2)
+ // - [ 0x01 | 0x01 | 0x01 | 0x01 | ... ] (3)
+ // = [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4)
+ //
+ // But there may be some bytes whose highest bit is already set after the
+ // xor operation. To rule out these positions, we AND them with the NOT
+ // of the XOR result:
+ //
+ // [ 0xFF | 0xFF | 0xEF | 0x1E | ... ] (5, NOT (2))
+ // & [ 0xFE | 0xFF | 0x0F | 0xE0 | ... ] (4)
+ // = [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6)
+ //
+ // To make the bitmask iteratable, only one bit can be set in each stride.
+ // So we AND each byte with 0x80 and keep only the highest bit:
+ //
+ // [ 0xFE | 0xFF | 0x0F | 0x10 | ... ] (6)
+ // & [ 0x80 | 0x80 | 0x80 | 0x80 | ... ] (7)
+ // = [ 0x80 | 0x80 | 0x00 | 0x00 | ... ] (8)
+ //
+ // However, there are possitbilites for false positives. For example, if the
+ // data is [ 0x10 | 0x11 | 0x10 | 0xF1 | ... ]. This only happens when there
+ // is a key only differs from the searched by the lowest bit. The claims
+ // are:
+ //
+ // - This never happens for `EMPTY` and `DELETED`, only full entries.
+ // - The check for key equality will catch these.
+ // - This only happens if there is at least 1 true match.
+ // - The chance of this happening is very low (< 1% chance per byte).
+ auto cmp = data ^ repeat_byte(byte);
+ auto result = LIBC_NAMESPACE::Endian::to_little_endian(
+ (cmp - repeat_byte(0x01)) & ~cmp & repeat_byte(0x80));
+ return {result};
+ }
+
+ // Find out the lanes equal to EMPTY or DELETE (highest bit set) and
+ // return the bitmask with corresponding bits set.
+ BitMask mask_available() const {
+ return {LIBC_NAMESPACE::Endian::to_little_endian(data) & repeat_byte(0x80)};
+ }
+};
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h
new file mode 100644
index 000000000000000..d0336f0957572ec
--- /dev/null
+++ b/libc/src/__support/HashTable/randomness.h
@@ -0,0 +1,59 @@
+//===-- HashTable Randomness ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_RANDOMNESS_H
+#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_RANDOMNESS_H
+
+#include "src/__support/common.h"
+#include "src/__support/hash.h"
+#include "src/__support/macros/attributes.h"
+#if defined(LIBC_HASHTABLE_USE_GETRANDOM)
+#include "src/errno/libc_errno.h"
+#include "src/sys/random/getrandom.h"
+#endif
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+namespace randomness {
+LIBC_INLINE_VAR thread_local static HashState state = {
+ 0x38049a7ea6f5a79b, 0x45cb02147c3f718a, 0x53eb431c12770718,
+ 0x5b55742bd20a2fcb};
+LIBC_INLINE_VAR thread_local static uint64_t counter = 0;
+LIBC_INLINE_VAR constexpr static uint64_t RESEED_PERIOD = 1024;
+static LIBC_INLINE uint64_t next_random_seed() {
+ if (counter % RESEED_PERIOD == 0) {
+ uint64_t entropy[2];
+ entropy[0] = reinterpret_cast<uint64_t>(&entropy);
+ entropy[1] = reinterpret_cast<uint64_t>(&state);
+#if defined(LIBC_HASHTABLE_USE_GETRANDOM)
+ int errno_backup = libc_errno;
+ ssize_t count = sizeof(entropy);
+ uint8_t *buffer = reinterpret_cast<uint8_t *>(entropy);
+ while (count > 0) {
+ ssize_t len = getrandom(buffer, count, 0);
+ if (len == -1) {
+ if (libc_errno == ENOSYS)
+ break;
+ continue;
+ }
+ count -= len;
+ buffer += len;
+ }
+ libc_errno = errno_backup;
+#endif
+ state.update(&entropy, sizeof(entropy));
+ }
+ state.update(&counter, sizeof(counter));
+ counter++;
+ return state.finish();
+}
+
+} // namespace randomness
+} // namespace internal
+} // namespace LIBC_NAMESPACE
+#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_RANDOMNESS_H
diff --git a/libc/src/__support/HashTable/sse2/bitmask_impl.inc b/libc/src/__support/HashTable/sse2/bitmask_impl.inc
new file mode 100644
index 000000000000000..00d5640303dc6cb
--- /dev/null
+++ b/libc/src/__support/HashTable/sse2/bitmask_impl.inc
@@ -0,0 +1,40 @@
+//===-- HashTable BitMasks SSE2 Implementation ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <emmintrin.h>
+namespace LIBC_NAMESPACE {
+namespace internal {
+// With SSE2, every bitmask is iteratable as
+// we use single bit to encode the data.
+
+using BitMask = BitMaskAdaptor<uint16_t, 0xffffu, 0x1u>;
+using IteratableBitMask = IteratableBitMaskAdaptor<BitMask>;
+
+struct Group {
+ __m128i data;
+
+ // Load a group of control words from an arbitary address.
+ static Group load(const void *__restrict addr) {
+ return {_mm_loadu_si128(static_cast<const __m128i *>(addr))};
+ }
+
+ // Find out the lanes equal to the given byte and return the bitmask
+ // with corresponding bits set.
+ IteratableBitMask match_byte(uint8_t byte) const {
+ auto cmp = _mm_cmpeq_epi8(data, _mm_set1_epi8(byte));
+ auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(cmp));
+ return {bitmask};
+ }
+
+ BitMask mask_available() const {
+ auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(data));
+ return {bitmask};
+ }
+};
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/__support/HashTable/table.h b/libc/src/__support/HashTable/table.h
new file mode 100644
index 000000000000000..4ec2ca81ee1aaed
--- /dev/null
+++ b/libc/src/__support/HashTable/table.h
@@ -0,0 +1,229 @@
+//===-- Fix-sized Monotonic HashTable ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H
+#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H
+
+#include "include/llvm-libc-types/ENTRY.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/HashTable/bitmask.h"
+#include "src/__support/bit.h"
+#include "src/__support/hash.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/memory_size.h"
+#include "src/string/memset.h"
+#include "src/string/strcmp.h"
+#include "src/string/strlen.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+static LIBC_INLINE uint8_t secondary_hash(uint64_t hash) {
+ // top 7 bits of the hash.
+ return static_cast<uint8_t>((hash >> 57) & 0x7f);
+}
+
+// Probe sequence based on triangular numbers, which is guaranteed (since our
+// table size is a power of two) to visit every group of elements exactly once.
+//
+// A triangular probe has us jump by 1 more group every time. So first we
+// jump by 1 group (meaning we just continue our linear scan), then 2 groups
+// (skipping over 1 group), then 3 groups (skipping over 2 groups), and so on.
+//
+// If we set sizeof(Group) to be one unit:
+// T[k] = sum {1 + 2 + ... + k} = k * (k + 1) / 2
+// It is provable that T[k] mod 2^m generates a permutation of
+// 0, 1, 2, 3, ..., 2^m - 2, 2^m - 1
+// Detailed proof is available at:
+// https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/
+struct ProbeSequence {
+ size_t position;
+ size_t stride;
+ size_t entries_mask;
+
+ size_t next() {
+ position += stride;
+ position &= entries_mask;
+ stride += sizeof(Group);
+ return position;
+ }
+};
+
+// The number of entries is at least group width: we do not
+// need to do the fixup when we set the control bytes.
+// The number of entries is at least 8: we don't have to worry
+// about special sizes when check the fullness of the table.
+static LIBC_INLINE size_t capacity_to_entries(size_t cap) {
+ if (8 >= sizeof(Group) && cap < 8)
+ return 8;
+ if (16 >= sizeof(Group) && cap < 15)
+ return 16;
+ if (cap < sizeof(Group))
+ cap = sizeof(Group);
+ // overflow is always checked in allocate()
+ return next_power_of_two(cap * 8 / 7);
+}
+
+// The heap memory layout for N buckets HashTable is as follows:
+//
+// =======================
+// | N * Entry |
+// ======================= <- align boundary
+// | Header |
+// =======================
+// | (N + 1) * Byte |
+// =======================
+//
+// The trailing group part is to make sure we can always load
+// a whole group of control bytes.
+
+struct HashTable {
+ HashState state;
+ size_t entries_mask; // number of buckets - 1
+ size_t available_slots; // less than capacity
+private:
+ // How many entries are there in the table.
+ size_t num_of_entries() const { return entries_mask + 1; }
+
+ bool is_full() const { return available_slots == 0; }
+
+ size_t offset_from_entries() const {
+ size_t entries_size = num_of_entries() * sizeof(ENTRY);
+ return entries_size + offset_to(entries_size, alignof(HashTable));
+ }
+
+ constexpr static size_t table_alignment() {
+ return alignof(HashTable) > alignof(ENTRY) ? alignof(HashTable)
+ : alignof(ENTRY);
+ }
+
+ constexpr static size_t offset_to_groups() { return sizeof(HashTable); }
+
+ ENTRY &entry(size_t i) { return reinterpret_cast<ENTRY *>(this)[-i - 1]; }
+
+ uint8_t &control(size_t i) {
+ uint8_t *ptr = reinterpret_cast<uint8_t *>(this) + offset_to_groups();
+ return ptr[i];
+ }
+
+ // We duplicate a group of control bytes to the end. Thus, it is possible that
+ // we need to set two control bytes at the same time.
+ void set_ctrl(size_t index, uint8_t value) {
+ size_t index2 = ((index - sizeof(Group)) & entries_mask) + sizeof(Group);
+ control(index) = value;
+ control(index2) = value;
+ }
+
+public:
+ static void deallocate(HashTable *table) {
+ if (table) {
+ void *ptr =
+ reinterpret_cast<uint8_t *>(table) - table->offset_from_entries();
+ free(ptr);
+ }
+ }
+ static HashTable *allocate(size_t capacity, uint64_t randomness) {
+ // check if capacity_to_entries overflows MAX_MEM_SIZE
+ if (capacity > size_t{1} << (8 * sizeof(size_t) - 1 - 3))
+ return nullptr;
+ SafeMemSize entries{capacity_to_entries(capacity)};
+ SafeMemSize entries_size = entries * SafeMemSize{sizeof(ENTRY)};
+ SafeMemSize align_boundary = entries_size.align_up(table_alignment());
+ SafeMemSize ctrl_sizes = entries + SafeMemSize{sizeof(Group)};
+ SafeMemSize header_size{offset_to_groups()};
+ SafeMemSize total_size =
+ (align_boundary + header_size + ctrl_sizes).align_up(table_alignment());
+ if (!total_size.valid())
+ return nullptr;
+ void *mem =
+ aligned_alloc(table_alignment(), static_cast<size_t>(total_size));
+
+ HashTable *table = reinterpret_cast<HashTable *>(
+ static_cast<uint8_t *>(mem) + align_boundary);
+ if (mem) {
+ table->entries_mask = entries - 1u;
+ table->available_slots = entries / 8 * 7;
+ table->state = HashState{randomness};
+ memset(&table->control(0), 0x80, ctrl_sizes);
+ memset(mem, 0, table->offset_from_entries());
+ }
+ return table;
+ }
+
+private:
+ size_t find(const char *key, uint64_t primary) {
+ uint8_t secondary = secondary_hash(primary);
+ ProbeSequence sequence{static_cast<size_t>(primary), 0, entries_mask};
+ while (true) {
+ size_t pos = sequence.next();
+ Group ctrls = Group::load(&control(pos));
+ IteratableBitMask masks = ctrls.match_byte(secondary);
+ for (size_t i : masks) {
+ size_t index = (pos + i) & entries_mask;
+ ENTRY &entry = this->entry(index);
+ if (LIBC_LIKELY(entry.key != nullptr && strcmp(entry.key, key) == 0))
+ return index;
+ }
+ BitMask available = ctrls.mask_available();
+ // Since there is no deletion, the first time we find an available slot
+ // it is also ready to be used as an insertion point. Therefore, we also
+ // return the first available slot we find. If such entry is empty, the
+ // key will be nullptr.
+ if (LIBC_LIKELY(available.any_bit_set())) {
+ size_t index =
+ (pos + available.lowest_set_bit_nonzero()) & entries_mask;
+ return index;
+ }
+ }
+ }
+
+private:
+ ENTRY *insert(ENTRY item, uint64_t primary) {
+ auto index = find(item.key, primary);
+ auto slot = &this->entry(index);
+ // SVr4 and POSIX.1-2001 specify that action is significant only for
+ // unsuccessful searches, so that an ENTER should not do anything
+ // for a successful search.
+ if (slot->key != nullptr) {
+ return slot;
+ }
+ if (!is_full()) {
+ set_ctrl(index, secondary_hash(primary));
+ slot->key = item.key;
+ slot->data = item.data;
+ available_slots--;
+ return slot;
+ }
+ return nullptr;
+ }
+
+public:
+ ENTRY *find(const char *key) {
+ LIBC_NAMESPACE::internal::HashState hasher = state;
+ hasher.update(key, strlen(key));
+ uint64_t primary = hasher.finish();
+ ENTRY &entry = this->entry(find(key, primary));
+ if (entry.key == nullptr)
+ return nullptr;
+ return &entry;
+ }
+ ENTRY *insert(ENTRY item) {
+ LIBC_NAMESPACE::internal::HashState hasher = state;
+ hasher.update(item.key, strlen(item.key));
+ uint64_t primary = hasher.finish();
+ return insert(item, primary);
+ }
+};
+} // namespace internal
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H
diff --git a/libc/src/__support/bit.h b/libc/src/__support/bit.h
index d0a15c89b7b45e7..5b97d9c38e0cd6e 100644
--- a/libc/src/__support/bit.h
+++ b/libc/src/__support/bit.h
@@ -28,6 +28,14 @@ template <typename T> LIBC_INLINE int constexpr correct_zero(T val, int bits) {
}
template <typename T> LIBC_INLINE constexpr int clz(T val);
+template <> LIBC_INLINE int clz<unsigned char>(unsigned char val) {
+ return __builtin_clz(static_cast<unsigned int>(val)) -
+ 8 * (sizeof(unsigned int) - sizeof(unsigned char));
+}
+template <> LIBC_INLINE int clz<unsigned short>(unsigned short val) {
+ return __builtin_clz(static_cast<unsigned int>(val)) -
+ 8 * (sizeof(unsigned int) - sizeof(unsigned short));
+}
template <> LIBC_INLINE int clz<unsigned int>(unsigned int val) {
return __builtin_clz(val);
}
@@ -42,6 +50,12 @@ clz<unsigned long long int>(unsigned long long int val) {
}
template <typename T> LIBC_INLINE constexpr int ctz(T val);
+template <> LIBC_INLINE int ctz<unsigned char>(unsigned char val) {
+ return __builtin_ctz(static_cast<unsigned int>(val));
+}
+template <> LIBC_INLINE int ctz<unsigned short>(unsigned short val) {
+ return __builtin_ctz(static_cast<unsigned int>(val));
+}
template <> LIBC_INLINE int ctz<unsigned int>(unsigned int val) {
return __builtin_ctz(val);
}
@@ -72,6 +86,21 @@ template <typename T> LIBC_INLINE constexpr int unsafe_clz(T val) {
return __internal::clz(val);
}
+template <typename T> LIBC_INLINE constexpr T next_power_of_two(T val) {
+ if (val == 0)
+ return 1;
+ T idx = safe_clz(val - 1);
+ return static_cast<T>(1) << ((8ull * sizeof(T)) - idx);
+}
+
+template <typename T> LIBC_INLINE constexpr bool is_power_of_two(T val) {
+ return val != 0 && (val & (val - 1)) == 0;
+}
+
+template <typename T> LIBC_INLINE constexpr T offset_to(T val, T align) {
+ return (-val) & (align - 1);
+}
+
} // namespace LIBC_NAMESPACE
#endif // LLVM_LIBC_SRC___SUPPORT_BIT_H
diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h
new file mode 100644
index 000000000000000..982e437116f348c
--- /dev/null
+++ b/libc/src/__support/hash.h
@@ -0,0 +1,161 @@
+//===-- Portable string hash function ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
+#define LLVM_LIBC_SRC___SUPPORT_HASH_H
+
+#include "src/__support/UInt128.h" // UInt128
+#include "src/__support/macros/attributes.h"
+#include <stdint.h> // For uint64_t
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+// Folded multiplication.
+// This function multiplies two 64-bit integers and xor the high and
+// low 64-bit parts of the result.
+LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
+ UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
+ UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
+ uint64_t low = static_cast<uint64_t>(p & mask);
+ uint64_t high = static_cast<uint64_t>(p >> 64);
+ return low ^ high;
+}
+
+// Read as little endian.
+// Shift-and-or implementation does not give a satisfactory code on aarch64.
+// Therefore, we use a union to read the value.
+template <typename T> LIBC_INLINE static T read_little_endian(const void *ptr) {
+ const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
+ union {
+ T value;
+ uint8_t buffer[sizeof(T)];
+ } data;
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+ // Compiler should able to optimize this as a load followed by a byte swap.
+ for (size_t i = 0; i < sizeof(T); ++i) {
+ data.buffer[i] = bytes[sizeof(T) - i - 1];
+ }
+#else
+ for (size_t i = 0; i < sizeof(T); ++i) {
+ data.buffer[i] = bytes[i];
+ }
+#endif
+ return data.value;
+}
+
+// Specialized read functions for small values. size must be <= 8.
+LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
+ uint64_t &low, uint64_t &high) {
+ const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
+ if (size >= 2) {
+ if (size >= 4) {
+ low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
+ high =
+ static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
+ } else {
+ low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
+ high = static_cast<uint64_t>(bytes[size - 1]);
+ }
+ } else {
+ if (size > 0) {
+ low = static_cast<uint64_t>(bytes[0]);
+ high = static_cast<uint64_t>(bytes[0]);
+ } else {
+ low = 0;
+ high = 0;
+ }
+ }
+}
+
+// This constant comes from Kunth's prng (it empirically works well).
+LIBC_INLINE_VAR static constexpr uint64_t MULTIPLE = 6364136223846793005;
+// Rotation amount for mixing.
+LIBC_INLINE_VAR static constexpr uint64_t ROTATE = 23;
+
+// Randomly generated values (for now, it uses the same values as in aHash).
+LIBC_INLINE_VAR static constexpr uint64_t RANDOMNESS[2][4] = {
+ {0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
+ 0x082efa98ec4e6c89},
+ {0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
+ 0x3f84d5b5b5470917},
+};
+
+LIBC_INLINE static uint64_t rotate_left(uint64_t x, uint64_t y) {
+ return (x << y) | (x >> (64 - y));
+}
+
+// This is a portable string hasher. It is not cryptographically secure.
+// The quality of the hash is good enough to pass all tests in SMHasher.
+// The implementation is derived from the generic routine of aHash.
+class HashState {
+ uint64_t buffer;
+ uint64_t pad;
+ uint64_t extra_keys[2];
+ void update(uint64_t low, uint64_t high) {
+ uint64_t combined =
+ folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
+ buffer = (buffer + pad) ^ combined;
+ buffer = rotate_left(buffer, ROTATE);
+ }
+ static uint64_t mix(uint64_t seed) {
+ HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
+ RANDOMNESS[0][3]};
+ mixer.update(seed, 0);
+ return mixer.finish();
+ }
+
+public:
+ constexpr HashState(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+ : buffer(a), pad(b), extra_keys{c, d} {}
+ HashState(uint64_t seed) {
+ // Mix one more round of the seed to make it stronger.
+ uint64_t mixed = mix(seed);
+ buffer = RANDOMNESS[1][0] ^ mixed;
+ pad = RANDOMNESS[1][1] ^ mixed;
+ extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
+ extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
+ }
+ void update(const void *ptr, size_t size) {
+ uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
+ buffer = (buffer + size) * MULTIPLE;
+ uint64_t low, high;
+ if (size > 8) {
+ if (size > 16) {
+ // update tail
+ low = read_little_endian<uint64_t>(&bytes[size - 16]);
+ high = read_little_endian<uint64_t>(&bytes[size - 8]);
+ update(low, high);
+ while (size > 16) {
+ low = read_little_endian<uint64_t>(&bytes[0]);
+ high = read_little_endian<uint64_t>(&bytes[8]);
+ update(low, high);
+ bytes += 16;
+ size -= 16;
+ }
+ } else {
+ low = read_little_endian<uint64_t>(&bytes[0]);
+ high = read_little_endian<uint64_t>(&bytes[size - 8]);
+ update(low, high);
+ }
+ } else {
+ read_small_values(ptr, size, low, high);
+ update(low, high);
+ }
+ }
+ uint64_t finish() {
+ uint64_t rot = buffer & 63;
+ uint64_t folded = folded_multiply(buffer, pad);
+ return rotate_left(folded, rot);
+ }
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC___SUPPORT_HASH_H
diff --git a/libc/src/__support/memory_size.h b/libc/src/__support/memory_size.h
new file mode 100644
index 000000000000000..7206a50c19d7228
--- /dev/null
+++ b/libc/src/__support/memory_size.h
@@ -0,0 +1,72 @@
+//===-- Memory Size ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/limits.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/bit.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+template <class T> LIBC_INLINE static bool mul_overflow(T a, T b, T *res) {
+#if defined(__has_builtin) && __has_builtin(__builtin_mul_overflow)
+ return __builtin_mul_overflow(a, b, res);
+#else
+ T max = cpp::numeric_limits<T>::max();
+ T min = cpp::numeric_limits<T>::min();
+ bool overflow = (b > 0 && (a > max / b || a < min / b)) ||
+ (b < 0 && (a < max / b || a > min / b));
+ if (!overflow)
+ *res = a * b;
+ return overflow;
+#endif
+}
+// Limit memory size to the max of ssize_t
+class SafeMemSize {
+private:
+ using type = cpp::make_signed_t<size_t>;
+ type value;
+ explicit SafeMemSize(type value) : value(value) {}
+
+public:
+ static constexpr size_t MAX_MEM_SIZE =
+ static_cast<size_t>(cpp::numeric_limits<type>::max());
+ explicit SafeMemSize(size_t value)
+ : value(value <= MAX_MEM_SIZE ? static_cast<type>(value) : -1) {}
+ operator size_t() { return static_cast<size_t>(value); }
+ bool valid() { return value >= 0; }
+ SafeMemSize operator+(const SafeMemSize &other) {
+ type result;
+ if (LIBC_UNLIKELY((value | other.value) < 0))
+ result = -1;
+ result = value + other.value;
+ return SafeMemSize{result};
+ }
+ SafeMemSize operator*(const SafeMemSize &other) {
+ type result;
+ if (LIBC_UNLIKELY((value | other.value) < 0))
+ result = -1;
+ if (LIBC_UNLIKELY(mul_overflow(value, other.value, &result)))
+ result = -1;
+ return SafeMemSize{result};
+ }
+ SafeMemSize align_up(size_t alignment) {
+ if (!is_power_of_two(alignment) || alignment > MAX_MEM_SIZE || !valid())
+ return SafeMemSize{type{-1}};
+
+ type offset = LIBC_NAMESPACE::offset_to<size_t>(value, alignment);
+
+ if (LIBC_UNLIKELY(offset > static_cast<type>(MAX_MEM_SIZE) - value))
+ return SafeMemSize{type{-1}};
+
+ return SafeMemSize{value + offset};
+ }
+};
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/CMakeLists.txt b/libc/src/search/CMakeLists.txt
new file mode 100644
index 000000000000000..a4252fa77fa5a9a
--- /dev/null
+++ b/libc/src/search/CMakeLists.txt
@@ -0,0 +1,77 @@
+add_subdirectory(hsearch)
+
+add_entrypoint_object(
+ hcreate
+ SRCS
+ hcreate.cpp
+ HDRS
+ hcreate.h
+ DEPENDS
+ libc.src.search.hsearch.global
+ libc.src.__support.HashTable.table
+ libc.src.__support.HashTable.randomness
+ libc.src.errno.errno
+ libc.include.search
+)
+
+add_entrypoint_object(
+ hcreate_r
+ SRCS
+ hcreate_r.cpp
+ HDRS
+ hcreate_r.h
+ DEPENDS
+ libc.src.__support.HashTable.table
+ libc.src.__support.HashTable.randomness
+ libc.src.errno.errno
+ libc.include.search
+)
+
+add_entrypoint_object(
+ hsearch
+ SRCS
+ hsearch.cpp
+ HDRS
+ hsearch.h
+ DEPENDS
+ libc.src.search.hsearch.global
+ libc.src.__support.HashTable.table
+ libc.src.errno.errno
+ libc.include.search
+)
+
+add_entrypoint_object(
+ hsearch_r
+ SRCS
+ hsearch_r.cpp
+ HDRS
+ hsearch_r.h
+ DEPENDS
+ libc.src.__support.HashTable.table
+ libc.src.errno.errno
+ libc.include.search
+)
+
+add_entrypoint_object(
+ hdestroy
+ SRCS
+ hdestroy.cpp
+ HDRS
+ hdestroy.h
+ DEPENDS
+ libc.src.search.hsearch.global
+ libc.src.__support.HashTable.table
+ libc.include.search
+)
+
+add_entrypoint_object(
+ hdestroy_r
+ SRCS
+ hdestroy_r.cpp
+ HDRS
+ hdestroy_r.h
+ DEPENDS
+ libc.src.errno.errno
+ libc.src.__support.HashTable.table
+ libc.include.search
+)
diff --git a/libc/src/search/hcreate.cpp b/libc/src/search/hcreate.cpp
new file mode 100644
index 000000000000000..9c05e317a2d05f3
--- /dev/null
+++ b/libc/src/search/hcreate.cpp
@@ -0,0 +1,28 @@
+//===-- Implementation of hcreate -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/search/hcreate.h"
+#include "src/__support/HashTable/randomness.h"
+#include "src/__support/HashTable/table.h"
+#include "src/errno/libc_errno.h"
+#include "src/search/hsearch/global.h"
+
+namespace LIBC_NAMESPACE {
+LLVM_LIBC_FUNCTION(int, hcreate, (size_t capacity)) {
+ uint64_t randomness = internal::randomness::next_random_seed();
+ internal::HashTable *table =
+ internal::HashTable::allocate(capacity, randomness);
+ if (table == nullptr) {
+ libc_errno = ENOMEM;
+ return 0;
+ }
+ internal::global_hash_table = table;
+ return 1;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hcreate.h b/libc/src/search/hcreate.h
new file mode 100644
index 000000000000000..2ac37fb030c26f2
--- /dev/null
+++ b/libc/src/search/hcreate.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for hcreate -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SEARCH_HCREATE_H
+#define LLVM_LIBC_SRC_SEARCH_HCREATE_H
+
+#include <search.h>
+
+namespace LIBC_NAMESPACE {
+int hcreate(size_t capacity);
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SEARCH_HCREATE_H
diff --git a/libc/src/search/hcreate_r.cpp b/libc/src/search/hcreate_r.cpp
new file mode 100644
index 000000000000000..612a45cd0c688b9
--- /dev/null
+++ b/libc/src/search/hcreate_r.cpp
@@ -0,0 +1,32 @@
+//===-- Implementation of hcreate_r -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/search/hcreate_r.h"
+#include "src/__support/HashTable/randomness.h"
+#include "src/__support/HashTable/table.h"
+#include "src/errno/libc_errno.h"
+
+namespace LIBC_NAMESPACE {
+LLVM_LIBC_FUNCTION(int, hcreate_r,
+ (size_t capacity, struct hsearch_data *htab)) {
+ if (htab == nullptr) {
+ libc_errno = EINVAL;
+ return 0;
+ }
+ uint64_t randomness = internal::randomness::next_random_seed();
+ internal::HashTable *table =
+ internal::HashTable::allocate(capacity, randomness);
+ if (table == nullptr) {
+ libc_errno = ENOMEM;
+ return 0;
+ }
+ htab->__opaque = table;
+ return 1;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hcreate_r.h b/libc/src/search/hcreate_r.h
new file mode 100644
index 000000000000000..e81895ef815c9fb
--- /dev/null
+++ b/libc/src/search/hcreate_r.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for hcreate_r ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SEARCH_HCREATE_R_H
+#define LLVM_LIBC_SRC_SEARCH_HCREATE_R_H
+
+#include <search.h>
+
+namespace LIBC_NAMESPACE {
+int hcreate_r(size_t capacity, struct hsearch_data *htab);
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SEARCH_HCREATE_R_H
diff --git a/libc/src/search/hdestroy.cpp b/libc/src/search/hdestroy.cpp
new file mode 100644
index 000000000000000..1981d371a9fb755
--- /dev/null
+++ b/libc/src/search/hdestroy.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of hdestroy ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/search/hdestroy.h"
+#include "src/__support/HashTable/table.h"
+#include "src/search/hsearch/global.h"
+
+namespace LIBC_NAMESPACE {
+LLVM_LIBC_FUNCTION(void, hdestroy, (void)) {
+ using namespace internal;
+ HashTable::deallocate(global_hash_table);
+ global_hash_table = nullptr;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hdestroy.h b/libc/src/search/hdestroy.h
new file mode 100644
index 000000000000000..b81e309a6bc8093
--- /dev/null
+++ b/libc/src/search/hdestroy.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for hdestroy -----------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SEARCH_HDESTROY_H
+#define LLVM_LIBC_SRC_SEARCH_HDESTROY_H
+
+#include <search.h>
+
+namespace LIBC_NAMESPACE {
+void hdestroy(void);
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SEARCH_HDESTROY_H
diff --git a/libc/src/search/hdestroy_r.cpp b/libc/src/search/hdestroy_r.cpp
new file mode 100644
index 000000000000000..f0dd872d874a77e
--- /dev/null
+++ b/libc/src/search/hdestroy_r.cpp
@@ -0,0 +1,25 @@
+//===-- Implementation of hdestroy_r ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/search/hdestroy_r.h"
+#include "src/__support/HashTable/table.h"
+#include "src/errno/libc_errno.h"
+
+namespace LIBC_NAMESPACE {
+LLVM_LIBC_FUNCTION(void, hdestroy_r, (struct hsearch_data * htab)) {
+ using namespace internal;
+ if (htab == nullptr) {
+ libc_errno = EINVAL;
+ return;
+ }
+ HashTable *table = static_cast<HashTable *>(htab->__opaque);
+ HashTable::deallocate(table);
+ htab->__opaque = nullptr;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hdestroy_r.h b/libc/src/search/hdestroy_r.h
new file mode 100644
index 000000000000000..503af417944488f
--- /dev/null
+++ b/libc/src/search/hdestroy_r.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for hdestroy_r ---------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SEARCH_HDESTROY_R_H
+#define LLVM_LIBC_SRC_SEARCH_HDESTROY_R_H
+
+#include <search.h>
+
+namespace LIBC_NAMESPACE {
+void hdestroy_r(struct hsearch_data *htab);
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SEARCH_HDESTROY_R_H
diff --git a/libc/src/search/hsearch.cpp b/libc/src/search/hsearch.cpp
new file mode 100644
index 000000000000000..ee5eee27f33a477
--- /dev/null
+++ b/libc/src/search/hsearch.cpp
@@ -0,0 +1,35 @@
+//===-- Implementation of hsearch -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/search/hsearch.h"
+#include "src/__support/HashTable/table.h"
+#include "src/errno/libc_errno.h"
+#include "src/search/hsearch/global.h"
+
+namespace LIBC_NAMESPACE {
+LLVM_LIBC_FUNCTION(ENTRY *, hsearch, (ENTRY item, ACTION action)) {
+ ENTRY *result;
+
+ switch (action) {
+ case FIND:
+ result = internal::global_hash_table->find(item.key);
+ if (result == nullptr) {
+ libc_errno = ESRCH;
+ }
+ break;
+ case ENTER:
+ result = internal::global_hash_table->insert(item);
+ if (result == nullptr) {
+ libc_errno = ENOMEM;
+ }
+ break;
+ }
+ return result;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hsearch.h b/libc/src/search/hsearch.h
new file mode 100644
index 000000000000000..32dc073a49b8343
--- /dev/null
+++ b/libc/src/search/hsearch.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for hsearch -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SEARCH_HSEARCH_H
+#define LLVM_LIBC_SRC_SEARCH_HSEARCH_H
+
+#include <search.h> // ENTRY, ACTION
+
+namespace LIBC_NAMESPACE {
+ENTRY *hsearch(ENTRY item, ACTION action);
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SEARCH_HSEARCH_H
diff --git a/libc/src/search/hsearch/CMakeLists.txt b/libc/src/search/hsearch/CMakeLists.txt
new file mode 100644
index 000000000000000..17289f03d0628f8
--- /dev/null
+++ b/libc/src/search/hsearch/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_object_library(
+ global
+ SRCS
+ global.cpp
+ HDRS
+ global.h
+)
diff --git a/libc/src/search/hsearch/global.cpp b/libc/src/search/hsearch/global.cpp
new file mode 100644
index 000000000000000..b6782ada50de455
--- /dev/null
+++ b/libc/src/search/hsearch/global.cpp
@@ -0,0 +1,13 @@
+//===-- Global hashtable implementation -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+struct HashTable *global_hash_table = nullptr;
+}
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hsearch/global.h b/libc/src/search/hsearch/global.h
new file mode 100644
index 000000000000000..292008cb0c8075a
--- /dev/null
+++ b/libc/src/search/hsearch/global.h
@@ -0,0 +1,13 @@
+//===-- Global hashtable header -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+extern struct HashTable *global_hash_table;
+}
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hsearch_r.cpp b/libc/src/search/hsearch_r.cpp
new file mode 100644
index 000000000000000..958fba7c00d0d46
--- /dev/null
+++ b/libc/src/search/hsearch_r.cpp
@@ -0,0 +1,42 @@
+//===-- Implementation of hsearch_r -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/search/hsearch_r.h"
+#include "src/__support/HashTable/table.h"
+#include "src/errno/libc_errno.h"
+
+namespace LIBC_NAMESPACE {
+LLVM_LIBC_FUNCTION(int, hsearch_r,
+ (ENTRY item, ACTION action, ENTRY **retval,
+ struct hsearch_data *htab)) {
+ if (htab == nullptr) {
+ libc_errno = EINVAL;
+ return 0;
+ }
+ internal::HashTable *table =
+ static_cast<internal::HashTable *>(htab->__opaque);
+ switch (action) {
+ case FIND:
+ *retval = table->find(item.key);
+ if (*retval == nullptr) {
+ libc_errno = ESRCH;
+ return 0;
+ }
+ break;
+ case ENTER:
+ *retval = table->insert(item);
+ if (*retval == nullptr) {
+ libc_errno = ENOMEM;
+ return 0;
+ }
+ break;
+ }
+ return 1;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hsearch_r.h b/libc/src/search/hsearch_r.h
new file mode 100644
index 000000000000000..d36094c2eba585d
--- /dev/null
+++ b/libc/src/search/hsearch_r.h
@@ -0,0 +1,19 @@
+//===-- Implementation header for hsearch_r ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SEARCH_HSEARCH_R_H
+#define LLVM_LIBC_SRC_SEARCH_HSEARCH_R_H
+
+#include <search.h> // ENTRY, ACTION
+
+namespace LIBC_NAMESPACE {
+int hsearch_r(ENTRY item, ACTION action, ENTRY **retval,
+ struct hsearch_data *htab);
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SEARCH_HSEARCH_R_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index 52452cd1037dbfb..c45b94f364397e3 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -46,6 +46,7 @@ add_subdirectory(stdlib)
add_subdirectory(inttypes)
add_subdirectory(stdio)
add_subdirectory(wchar)
+add_subdirectory(search)
if(${LIBC_TARGET_OS} STREQUAL "linux")
add_subdirectory(fcntl)
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 2920535fbaa56e7..8b323adfedb21b0 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -126,6 +126,29 @@ add_libc_test(
libc.src.__support.char_vector
)
+add_libc_test(
+ hash_test
+ SUITE
+ libc-support-tests
+ SRCS
+ hash_test.cpp
+ DEPENDS
+ libc.src.__support.hash
+ libc.include.stdlib
+ UNIT_TEST_ONLY
+ # Aligned Allocation is not supported in hermetic builds.
+)
+
+add_libc_test(
+ memory_size_test
+ SUITE
+ libc-support-tests
+ SRCS
+ memory_size_test.cpp
+ DEPENDS
+ libc.src.__support.memory_size
+)
+
add_executable(
libc_str_to_float_comparison_test
str_to_float_comparison_test.cpp
@@ -155,3 +178,4 @@ add_subdirectory(File)
add_subdirectory(RPC)
add_subdirectory(OSUtil)
add_subdirectory(FPUtil)
+add_subdirectory(HashTable)
diff --git a/libc/test/src/__support/HashTable/CMakeLists.txt b/libc/test/src/__support/HashTable/CMakeLists.txt
new file mode 100644
index 000000000000000..d1364f35840c2be
--- /dev/null
+++ b/libc/test/src/__support/HashTable/CMakeLists.txt
@@ -0,0 +1,67 @@
+add_libc_test(
+ bitmask_test
+ SUITE
+ libc-support-tests
+ SRCS
+ bitmask_test.cpp
+ DEPENDS
+ libc.src.__support.HashTable.bitmask
+ libc.include.stdlib
+)
+add_libc_test(
+ table_test
+ SUITE
+ libc-support-tests
+ SRCS
+ table_test.cpp
+ DEPENDS
+ libc.src.__support.HashTable.randomness
+ libc.src.__support.HashTable.table
+ libc.src.__support.common
+ UNIT_TEST_ONLY
+)
+cpu_supports(CPU_SUPPORTS_SSE2 "SSE2")
+if (CPU_SUPPORTS_SSE2)
+ add_libc_test(
+ group_test_sse2
+ SUITE
+ libc-support-tests
+ SRCS
+ group_test.cpp
+ DEPENDS
+ libc.src.__support.HashTable.bitmask
+ libc.include.stdlib
+ libc.src.string.memcmp
+ COMPILE_OPTIONS
+ ${LIBC_COMPILE_OPTIONS_NATIVE}
+ UNIT_TEST_ONLY
+ )
+ add_libc_test(
+ group_test
+ SUITE
+ libc-support-tests
+ SRCS
+ group_test.cpp
+ DEPENDS
+ libc.src.__support.HashTable.bitmask
+ libc.include.stdlib
+ libc.src.string.memcmp
+ COMPILE_OPTIONS
+ -mno-sse2 -mtune=generic
+ UNIT_TEST_ONLY
+ )
+else()
+ add_libc_test(
+ group_test
+ SUITE
+ libc-support-tests
+ SRCS
+ group_test.cpp
+ DEPENDS
+ libc.src.__support.HashTable.bitmask
+ libc.include.stdlib
+ libc.src.string.memcmp
+ UNIT_TEST_ONLY
+ )
+ message(STATUS "Skipping test for libc.test.src.__support.HashTable.group_test_sse2: insufficient host cpu features 'SSE2'")
+endif()
diff --git a/libc/test/src/__support/HashTable/bitmask_test.cpp b/libc/test/src/__support/HashTable/bitmask_test.cpp
new file mode 100644
index 000000000000000..c816c5d10638897
--- /dev/null
+++ b/libc/test/src/__support/HashTable/bitmask_test.cpp
@@ -0,0 +1,69 @@
+//===-- Unittests for bitmask ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/HashTable/bitmask.h"
+#include "test/UnitTest/Test.h"
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+using ShortBitMask = BitMaskAdaptor<uint16_t, 0xffff, 1>;
+using LargeBitMask = BitMaskAdaptor<uint64_t, 0x80'80'80'80'80'80'80'80, 8>;
+
+TEST(LlvmLibcHashTableBitMaskTest, SingleBitStrideLowestSetBit) {
+ uint16_t data = 0xffff;
+ for (size_t i = 0; i < 16; ++i) {
+ if (ShortBitMask{data}.any_bit_set()) {
+ ASSERT_EQ(ShortBitMask{data}.lowest_set_bit_nonzero(), i);
+ data <<= 1;
+ }
+ }
+}
+
+TEST(LlvmLibcHashTableBitMaskTest, MultiBitStrideLowestSetBit) {
+ uint64_t data = 0xffff'ffff'ffff'ffff;
+ for (size_t i = 0; i < 8; ++i) {
+ for (size_t j = 0; j < 8; ++j) {
+ if (LargeBitMask{data}.any_bit_set()) {
+ ASSERT_EQ(LargeBitMask{data}.lowest_set_bit_nonzero(), i);
+ data <<= 1;
+ }
+ }
+ }
+}
+
+TEST(LlvmLibcHashTableBitMaskTest, SingleBitStrideIteration) {
+ using Iter = IteratableBitMaskAdaptor<ShortBitMask>;
+ uint16_t data = 0xffff;
+ for (size_t i = 0; i < 16; ++i) {
+ Iter iter = {data};
+ size_t j = i;
+ for (auto x : iter) {
+ ASSERT_EQ(x, j);
+ j++;
+ }
+ ASSERT_EQ(j, size_t{16});
+ data <<= 1;
+ }
+}
+
+TEST(LlvmLibcHashTableBitMaskTest, MultiBitStrideIteration) {
+ using Iter = IteratableBitMaskAdaptor<LargeBitMask>;
+ uint64_t data = Iter::MASK;
+ for (size_t i = 0; i < 8; ++i) {
+ Iter iter = {data};
+ size_t j = i;
+ for (auto x : iter) {
+ ASSERT_EQ(x, j);
+ j++;
+ }
+ ASSERT_EQ(j, size_t{8});
+ data <<= Iter::STRIDE;
+ }
+}
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/__support/HashTable/group_test.cpp b/libc/test/src/__support/HashTable/group_test.cpp
new file mode 100644
index 000000000000000..810fff093b621ac
--- /dev/null
+++ b/libc/test/src/__support/HashTable/group_test.cpp
@@ -0,0 +1,91 @@
+//===-- Unittests for control group ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/HashTable/bitmask.h"
+
+#include "src/string/memcmp.h"
+#include "test/UnitTest/Test.h"
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+
+struct ByteArray {
+ alignas(Group) uint8_t data[sizeof(Group) + 1]{};
+};
+
+TEST(LlvmLibcHashTableBitMaskTest, Match) {
+ // Any pair of targets have bit differences not only at the lowest bit.
+ // No False positive.
+ uint8_t targets[4] = {0x00, 0x11, 0xFF, 0x0F};
+ size_t count[4] = {0, 0, 0, 0};
+ size_t appearance[4][sizeof(Group)];
+ ByteArray array{};
+
+ union {
+ uintptr_t random;
+ int data[sizeof(uintptr_t) / sizeof(int)];
+ };
+
+ for (int &i : data)
+ i = rand();
+
+ for (size_t i = 0; i < sizeof(Group); ++i) {
+ size_t choice = random % 4;
+ random /= 4;
+ array.data[i] = targets[choice];
+ appearance[choice][count[choice]++] = i;
+ }
+
+ for (size_t t = 0; t < sizeof(targets); ++t) {
+ auto bitmask = Group::load(array.data).match_byte(targets[t]);
+ for (size_t i = 0; i < count[t]; ++i) {
+ size_t iterated = 0;
+ for (size_t position : bitmask) {
+ ASSERT_EQ(appearance[t][iterated], position);
+ iterated++;
+ }
+ ASSERT_EQ(count[t], iterated);
+ }
+ }
+}
+
+TEST(LlvmLibcHashTableBitMaskTest, MaskAvailable) {
+ uint8_t values[3] = {0x00, 0x0F, 0x80};
+
+ for (size_t i = 0; i < sizeof(Group); ++i) {
+ ByteArray array{};
+
+ union {
+ uintptr_t random;
+ int data[sizeof(uintptr_t) / sizeof(int)];
+ };
+
+ for (int &j : data)
+ j = rand();
+
+ ASSERT_FALSE(Group::load(array.data).mask_available().any_bit_set());
+
+ array.data[i] = 0x80;
+ for (size_t j = 0; j < sizeof(Group); ++j) {
+ if (i == j)
+ continue;
+ size_t sample_space = 2 + (j > i);
+ size_t choice = random % sample_space;
+ random /= sizeof(values);
+ array.data[j] = values[choice];
+ }
+
+ auto mask = Group::load(array.data).mask_available();
+ ASSERT_TRUE(mask.any_bit_set());
+ ASSERT_EQ(mask.lowest_set_bit_nonzero(), i);
+ }
+}
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/__support/HashTable/table_test.cpp b/libc/test/src/__support/HashTable/table_test.cpp
new file mode 100644
index 000000000000000..f0aa82f2d5c768b
--- /dev/null
+++ b/libc/test/src/__support/HashTable/table_test.cpp
@@ -0,0 +1,77 @@
+//===-- Unittests for table -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/HashTable/randomness.h"
+#include "src/__support/HashTable/table.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+TEST(LlvmLibcTableTest, AllocationAndDeallocation) {
+ size_t caps[] = {0, 1, 2, 3, 4, 7, 11, 37, 1024, 5261, 19999};
+ const char *keys[] = {"", "a", "ab", "abc",
+ "abcd", "abcde", "abcdef", "abcdefg",
+ "abcdefgh", "abcdefghi", "abcdefghij"};
+ for (size_t i : caps) {
+ HashTable *table = HashTable::allocate(i, 1);
+ ASSERT_NE(table, static_cast<HashTable *>(nullptr));
+ for (const char *key : keys) {
+ ASSERT_EQ(table->find(key), static_cast<ENTRY *>(nullptr));
+ }
+ HashTable::deallocate(table);
+ }
+ ASSERT_EQ(HashTable::allocate(-1, 0), static_cast<HashTable *>(nullptr));
+ HashTable::deallocate(nullptr);
+}
+
+TEST(LlvmLibcTableTest, Insertion) {
+ union key {
+ uint64_t value;
+ char bytes[8];
+ } keys[256];
+ for (size_t k = 0; k < 256; ++k) {
+ keys[k].value = LIBC_NAMESPACE::Endian::to_little_endian(k);
+ }
+ constexpr size_t CAP = next_power_of_two((sizeof(Group) + 1) * 8 / 7) / 8 * 7;
+ static_assert(CAP + 1 < 256, "CAP is too large for this test.");
+ HashTable *table =
+ HashTable::allocate(sizeof(Group) + 1, randomness::next_random_seed());
+ ASSERT_NE(table, static_cast<HashTable *>(nullptr));
+
+ // insert to full capacity.
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_NE(table->insert({keys[i].bytes, keys[i].bytes}),
+ static_cast<ENTRY *>(nullptr));
+ }
+
+ // one more insert should fail.
+ ASSERT_EQ(table->insert({keys[CAP + 1].bytes, keys[CAP + 1].bytes}),
+ static_cast<ENTRY *>(nullptr));
+
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(strcmp(table->find(keys[i].bytes)->key, keys[i].bytes), 0);
+ }
+ for (size_t i = CAP; i < 256; ++i) {
+ ASSERT_EQ(table->find(keys[i].bytes), static_cast<ENTRY *>(nullptr));
+ }
+
+ // do not replace old value
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_NE(table->insert({keys[i].bytes, reinterpret_cast<void *>(i)}),
+ static_cast<ENTRY *>(nullptr));
+ }
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(table->find(keys[i].bytes)->data,
+ reinterpret_cast<void *>(keys[i].bytes));
+ }
+
+ HashTable::deallocate(table);
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/__support/bit_test.cpp b/libc/test/src/__support/bit_test.cpp
index 09d9c2f0a4ed88a..c25aef76e0266e1 100644
--- a/libc/test/src/__support/bit_test.cpp
+++ b/libc/test/src/__support/bit_test.cpp
@@ -15,4 +15,21 @@ TEST(LlvmLibcBlockBitTest, TODO) {
// TODO Implement me.
}
+TEST(LlvmLibcBlockBitTest, NextPowerOfTwo) {
+ ASSERT_EQ(1u, next_power_of_two(0u));
+ for (unsigned int i = 0; i < 31; ++i) {
+ ASSERT_EQ(1u << (i + 1), next_power_of_two((1u << i) + 1));
+ ASSERT_EQ(1u << i, next_power_of_two(1u << i));
+ }
+}
+
+TEST(LlvmLibcBlockBitTest, IsPowerOfTwo) {
+ ASSERT_FALSE(is_power_of_two(0u));
+ ASSERT_TRUE(is_power_of_two(1u));
+ for (unsigned int i = 1; i < 31; ++i) {
+ ASSERT_TRUE(is_power_of_two(1u << i));
+ ASSERT_FALSE(is_power_of_two((1u << i) + 1));
+ }
+}
+
} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/__support/hash_test.cpp b/libc/test/src/__support/hash_test.cpp
new file mode 100644
index 000000000000000..1b86d21a1fd4913
--- /dev/null
+++ b/libc/test/src/__support/hash_test.cpp
@@ -0,0 +1,135 @@
+//===-- Unittests for hash ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/hash.h"
+#include "test/UnitTest/Test.h"
+#include <cstdint>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+template <class T> struct AlignedMemory {
+ T *data;
+ size_t offset;
+ AlignedMemory(size_t size, size_t alignment, size_t offset) : offset(offset) {
+ size_t sz = size * sizeof(T);
+ size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment;
+ data = static_cast<T *>(::aligned_alloc(alignment, aligned));
+ data += offset % alignment;
+ }
+ ~AlignedMemory() { ::free(data - offset); }
+};
+
+size_t sizes[] = {0, 1, 23, 59, 1024, 5261};
+char values[] = {0, 1, 23, 59, 102, -1};
+
+// Hash value should not change with different alignments.
+TEST(LlvmLibcHashTest, SanityCheck) {
+ for (size_t sz : sizes) {
+ for (uint8_t val : values) {
+ uint64_t hash;
+ {
+ AlignedMemory<char> mem(sz, 64, 0);
+ ::memset(mem.data, val, sz);
+ LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
+ state.update(mem.data, sz);
+ hash = state.finish();
+ }
+ for (size_t offset = 1; offset < 64; ++offset) {
+ AlignedMemory<char> mem(sz, 64, offset);
+ ::memset(mem.data, val, sz);
+ LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
+ state.update(mem.data, sz);
+ ASSERT_EQ(hash, state.finish());
+ }
+ }
+ }
+}
+
+static inline size_t popcnt(uint64_t x) {
+ size_t count = 0;
+ while (x) {
+ count += x & 1;
+ x >>= 1;
+ }
+ return count;
+}
+
+// Mutate a single bit in a rather large input. The hash should change
+// significantly. At least one fifth of the bits should not match.
+TEST(LlvmLibcHashTest, Avalanche) {
+ for (size_t sz : sizes) {
+ for (uint8_t val : values) {
+ uint64_t hash;
+ AlignedMemory<char> mem(sz, 64, 0);
+ ::memset(mem.data, val, sz);
+ {
+ LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
+ state.update(mem.data, sz);
+ hash = state.finish();
+ }
+ for (size_t i = 0; i < sz; ++i) {
+ for (size_t j = 0; j < 8; ++j) {
+ uint8_t mask = 1 << j;
+ mem.data[i] ^= mask;
+ {
+ LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
+ state.update(mem.data, sz);
+ uint64_t new_hash = state.finish();
+ ASSERT_GE(popcnt(hash ^ new_hash), size_t{13});
+ }
+ mem.data[i] ^= mask;
+ }
+ }
+ }
+ }
+}
+
+// Hash a random sequence of input. The LSB should be uniform enough such that
+// values spread across the entire range.
+TEST(LlvmLibcHashTest, UniformLSB) {
+ srand(0);
+ for (size_t sz : sizes) {
+ AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
+ ::memset(counters.data, 0, sz * sizeof(size_t));
+ for (size_t i = 0; i < 200 * sz; ++i) {
+ int randomness[8] = {rand(), rand(), rand(), rand(),
+ rand(), rand(), rand(), rand()};
+ {
+ LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b};
+ state.update(randomness, sizeof(randomness));
+ uint64_t hash = state.finish();
+ counters.data[hash % sz]++;
+ }
+ }
+ for (size_t i = 0; i < sz; ++i) {
+ ASSERT_GE(counters.data[i], size_t{140});
+ ASSERT_LE(counters.data[i], size_t{260});
+ }
+ }
+}
+
+// Hash a low entropy sequence. The MSB should be uniform enough such that
+// there is no significant bias even if the value range is small.
+// Top 7 bits is examined because it will be used as a secondary key in
+// the hash table.
+TEST(LlvmLibcHashTest, UniformMSB) {
+ size_t sz = 1 << 7;
+ AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
+ ::memset(counters.data, 0, sz * sizeof(size_t));
+ for (size_t i = 0; i < 200 * sz; ++i) {
+ LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8};
+ state.update(&i, sizeof(i));
+ uint64_t hash = state.finish();
+ counters.data[hash >> 57]++;
+ }
+ for (size_t i = 0; i < sz; ++i) {
+ ASSERT_GE(counters.data[i], size_t{140});
+ ASSERT_LE(counters.data[i], size_t{260});
+ }
+}
diff --git a/libc/test/src/__support/memory_size_test.cpp b/libc/test/src/__support/memory_size_test.cpp
new file mode 100644
index 000000000000000..486436eff267acf
--- /dev/null
+++ b/libc/test/src/__support/memory_size_test.cpp
@@ -0,0 +1,86 @@
+//===-- Unittests for MemorySize ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/memory_size.h"
+#include "test/UnitTest/Test.h"
+#include <cstddef>
+
+namespace LIBC_NAMESPACE {
+namespace internal {
+static inline constexpr size_t SAFE_MEM_SIZE_TEST_LIMIT =
+ static_cast<size_t>(cpp::numeric_limits<cpp::make_signed_t<size_t>>::max());
+
+TEST(LlvmLibcMemSizeTest, Constuction) {
+ ASSERT_FALSE(SafeMemSize{static_cast<size_t>(-1)}.valid());
+ ASSERT_FALSE(SafeMemSize{static_cast<size_t>(-2)}.valid());
+ ASSERT_FALSE(SafeMemSize{static_cast<size_t>(-1024 + 33)}.valid());
+ ASSERT_FALSE(SafeMemSize{static_cast<size_t>(-1024 + 66)}.valid());
+ ASSERT_FALSE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT + 1}.valid());
+ ASSERT_FALSE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT + 13}.valid());
+
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(1)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(1024 + 13)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(2048 - 13)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(4096 + 1)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(8192 - 1)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(16384 + 15)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(32768 * 3)}.valid());
+ ASSERT_TRUE(SafeMemSize{static_cast<size_t>(65536 * 13)}.valid());
+ ASSERT_TRUE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT}.valid());
+ ASSERT_TRUE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT - 1}.valid());
+ ASSERT_TRUE(SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT - 13}.valid());
+}
+
+TEST(LlvmLibcMemSizeTest, Addition) {
+ auto max = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT};
+ auto half = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 2};
+ auto third = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 3};
+
+ ASSERT_TRUE(half.valid());
+ ASSERT_TRUE(third.valid());
+ ASSERT_TRUE((half + half).valid());
+ ASSERT_TRUE((third + third + third).valid());
+ ASSERT_TRUE((half + third).valid());
+
+ ASSERT_FALSE((max + SafeMemSize{static_cast<size_t>(1)}).valid());
+ ASSERT_FALSE((third + third + third + third).valid());
+ ASSERT_FALSE((half + half + half).valid());
+}
+
+TEST(LlvmLibcMemSizeTest, Multiplication) {
+ auto max = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT};
+ auto half = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 2};
+ auto third = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT / 3};
+
+ ASSERT_TRUE((max * SafeMemSize{static_cast<size_t>(1)}).valid());
+ ASSERT_TRUE((max * SafeMemSize{static_cast<size_t>(0)}).valid());
+
+ ASSERT_FALSE((max * SafeMemSize{static_cast<size_t>(2)}).valid());
+ ASSERT_FALSE((half * half).valid());
+ ASSERT_FALSE((half * SafeMemSize{static_cast<size_t>(3)}).valid());
+ ASSERT_FALSE((third * SafeMemSize{static_cast<size_t>(4)}).valid());
+}
+
+TEST(LlvmLibcMemSizeTest, AlignUp) {
+ size_t sizes[] = {
+ 0, 1, 8, 13, 60, 97, 128, 1024, 5124, 5120,
+ };
+ for (size_t i = 2; i <= 16; ++i) {
+ size_t alignment = 1 << i;
+ for (size_t size : sizes) {
+ auto safe_size = SafeMemSize{size};
+ auto safe_aligned_size = safe_size.align_up(alignment);
+ ASSERT_TRUE(safe_aligned_size.valid());
+ ASSERT_EQ(static_cast<size_t>(safe_aligned_size) % alignment, size_t{0});
+ }
+ }
+ auto max = SafeMemSize{SAFE_MEM_SIZE_TEST_LIMIT};
+ ASSERT_FALSE(max.align_up(8).valid());
+}
+} // namespace internal
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/search/CMakeLists.txt b/libc/test/src/search/CMakeLists.txt
new file mode 100644
index 000000000000000..d624f1443094900
--- /dev/null
+++ b/libc/test/src/search/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_custom_target(libc_search_unittests)
+add_libc_unittest(
+ hsearch_test
+ SUITE
+ libc_search_unittests
+ SRCS
+ hsearch_test.cpp
+ DEPENDS
+ libc.src.search.hsearch_r
+ libc.src.search.hcreate_r
+ libc.src.search.hdestroy_r
+ libc.src.search.hsearch
+ libc.src.search.hcreate
+ libc.src.search.hdestroy
+ libc.src.errno.errno
+)
diff --git a/libc/test/src/search/hsearch_test.cpp b/libc/test/src/search/hsearch_test.cpp
new file mode 100644
index 000000000000000..bc9dea748758ac0
--- /dev/null
+++ b/libc/test/src/search/hsearch_test.cpp
@@ -0,0 +1,124 @@
+//===-- Unittests for hsearch ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/HashTable/table.h"
+#include "src/__support/bit.h"
+#include "src/search/hcreate.h"
+#include "src/search/hcreate_r.h"
+#include "src/search/hdestroy.h"
+#include "src/search/hdestroy_r.h"
+#include "src/search/hsearch.h"
+#include "test/UnitTest/ErrnoSetterMatcher.h"
+#include "test/UnitTest/Test.h"
+#include <asm-generic/errno-base.h>
+
+TEST(LlvmLibcHsearchTest, CreateTooLarge) {
+ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+ struct hsearch_data hdata;
+ ASSERT_THAT(LIBC_NAMESPACE::hcreate(-1), Fails(ENOMEM, 0));
+ ASSERT_THAT(LIBC_NAMESPACE::hcreate_r(-1, &hdata), Fails(ENOMEM, 0));
+}
+
+TEST(LlvmLibcHSearchTest, CreateInvalid) {
+ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+ ASSERT_THAT(LIBC_NAMESPACE::hcreate_r(16, nullptr), Fails(EINVAL, 0));
+}
+
+TEST(LlvmLibcHSearchTest, CreateValid) {
+ struct hsearch_data hdata;
+ ASSERT_GT(LIBC_NAMESPACE::hcreate_r(1, &hdata), 0);
+ LIBC_NAMESPACE::hdestroy_r(&hdata);
+
+ ASSERT_GT(LIBC_NAMESPACE::hcreate(1), 0);
+ LIBC_NAMESPACE::hdestroy();
+}
+
+char search_data[] = "1234567890abcdefghijklmnopqrstuvwxyz"
+ "1234567890abcdefghijklmnopqrstuvwxyz"
+ "1234567890abcdefghijklmnopqrstuvwxyz"
+ "1234567890abcdefghijklmnopqrstuvwxyz"
+ "1234567890abcdefghijklmnopqrstuvwxyz";
+char search_data2[] =
+ "@@@@@@@@@@@@@@!!!!!!!!!!!!!!!!!###########$$$$$$$$$$^^^^^^&&&&&&&&";
+
+constexpr size_t GROUP_SIZE = sizeof(LIBC_NAMESPACE::internal::Group);
+constexpr size_t CAP =
+ LIBC_NAMESPACE::next_power_of_two((GROUP_SIZE + 1) * 8 / 7) / 8 * 7;
+static_assert(CAP < sizeof(search_data), "CAP too large");
+
+TEST(LlvmLibcHSearchTest, InsertTooMany) {
+ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+ ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0);
+
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, ENTER)->key,
+ &search_data[i]);
+ }
+ ASSERT_THAT(static_cast<void *>(
+ LIBC_NAMESPACE::hsearch({search_data2, nullptr}, ENTER)),
+ Fails(ENOMEM, static_cast<void *>(nullptr)));
+ LIBC_NAMESPACE::hdestroy();
+}
+
+TEST(LlvmLibcHSearchTest, NotFound) {
+ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+ ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0);
+ ASSERT_THAT(static_cast<void *>(
+ LIBC_NAMESPACE::hsearch({search_data2, nullptr}, FIND)),
+ Fails(ESRCH, static_cast<void *>(nullptr)));
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, ENTER)->key,
+ &search_data[i]);
+ }
+ ASSERT_THAT(static_cast<void *>(
+ LIBC_NAMESPACE::hsearch({search_data2, nullptr}, FIND)),
+ Fails(ESRCH, static_cast<void *>(nullptr)));
+ LIBC_NAMESPACE::hdestroy();
+}
+
+TEST(LlvmLibcHSearchTest, Found) {
+ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+ ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0);
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch(
+ {&search_data[i], reinterpret_cast<void *>(i)}, ENTER)
+ ->key,
+ &search_data[i]);
+ }
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data,
+ reinterpret_cast<void *>(i));
+ }
+ LIBC_NAMESPACE::hdestroy();
+}
+
+TEST(LlvmLibcHSearchTest, OnlyInsertWhenNotFound) {
+ using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Fails;
+ ASSERT_GT(LIBC_NAMESPACE::hcreate(GROUP_SIZE + 1), 0);
+ for (size_t i = 0; i < CAP / 7 * 5; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch(
+ {&search_data[i], reinterpret_cast<void *>(i)}, ENTER)
+ ->key,
+ &search_data[i]);
+ }
+ for (size_t i = 0; i < CAP; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch(
+ {&search_data[i], reinterpret_cast<void *>(1000 + i)}, ENTER)
+ ->key,
+ &search_data[i]);
+ }
+ for (size_t i = 0; i < CAP / 7 * 5; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data,
+ reinterpret_cast<void *>(i));
+ }
+ for (size_t i = CAP / 7 * 5; i < CAP; ++i) {
+ ASSERT_EQ(LIBC_NAMESPACE::hsearch({&search_data[i], nullptr}, FIND)->data,
+ reinterpret_cast<void *>(1000 + i));
+ }
+ LIBC_NAMESPACE::hdestroy();
+}
diff --git a/libc/utils/smhasher/smhasher.patch b/libc/utils/smhasher/smhasher.patch
new file mode 100644
index 000000000000000..8e3c918cf7aedda
--- /dev/null
+++ b/libc/utils/smhasher/smhasher.patch
@@ -0,0 +1,205 @@
+diff --git a/Hashes.h b/Hashes.h
+index 0071a6b..59bd2c4 100644
+--- a/Hashes.h
++++ b/Hashes.h
+@@ -1374,3 +1374,173 @@ void khashv64_test ( const void *key, int len, uint32_t seed, void *out);
+ extern PolymurHashParams g_polymurhashparams;
+ void polymur_seed_init (size_t &seed);
+ void polymur_test ( const void *key, int len, uint32_t seed, void *out);
++
++#include <cstddef>
++#include <cstdint>
++namespace llvm_libc_hash {
++ //===-- Portable string hash function ---------------------------*- C++ -*-===//
++//
++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
++// See https://llvm.org/LICENSE.txt for license information.
++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
++//
++//===----------------------------------------------------------------------===//
++
++
++#define LIBC_INLINE inline
++#define LIBC_INLINE_VAR inline
++using UInt128 = __uint128_t;
++
++// Folded multiplication.
++// This function multiplies two 64-bit integers and xor the high and
++// low 64-bit parts of the result.
++LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
++ UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
++ UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
++ uint64_t low = static_cast<uint64_t>(p & mask);
++ uint64_t high = static_cast<uint64_t>(p >> 64);
++ return low ^ high;
++}
++
++// Read as little endian.
++// Shift-and-or implementation does not give a satisfactory code on aarch64.
++// Therefore, we use a union to read the value.
++template <typename T>
++LIBC_INLINE static T read_little_endian(const void *ptr) {
++ const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
++ union {
++ T value;
++ uint8_t buffer[sizeof(T)];
++ } data;
++ #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
++ for (size_t i = 0; i < sizeof(T); ++i) {
++ data.buffer[i] = bytes[sizeof(T) - i - 1];
++ }
++ #else
++ for (size_t i = 0; i < sizeof(T); ++i) {
++ data.buffer[i] = bytes[i];
++ }
++ #endif
++ return data.value;
++}
++
++
++// Specialized read functions for small values. size must be <= 8.
++LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
++ uint64_t &low, uint64_t &high) {
++ const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
++ if (size >= 2) {
++ if (size >= 4) {
++ low = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[0]));
++ high = static_cast<uint64_t>(read_little_endian<uint32_t>(&bytes[size - 4]));
++ } else {
++ low = static_cast<uint64_t>(read_little_endian<uint16_t>(&bytes[0]));
++ high = static_cast<uint64_t>(bytes[size - 1]);
++ }
++ } else {
++ if (size > 0) {
++ low = static_cast<uint64_t>(bytes[0]);
++ high = static_cast<uint64_t>(bytes[0]);
++ } else {
++ low = 0;
++ high = 0;
++ }
++ }
++}
++
++// This constant comes from Kunth's prng (it empirically works well).
++LIBC_INLINE_VAR static constexpr uint64_t MULTIPLE = 6364136223846793005;
++// Rotation amount for mixing.
++LIBC_INLINE_VAR static constexpr uint64_t ROTATE = 23;
++
++// Randomly generated values (for now, it uses the same values from aHash).
++LIBC_INLINE_VAR static constexpr uint64_t RANDOMNESS[2][4] = {
++ 0x243f6a8885a308d3,
++ 0x13198a2e03707344,
++ 0xa4093822299f31d0,
++ 0x082efa98ec4e6c89,
++ 0x452821e638d01377,
++ 0xbe5466cf34e90c6c,
++ 0xc0ac29b7c97c50dd,
++ 0x3f84d5b5b5470917,
++};
++
++
++LIBC_INLINE static uint64_t rotate_left(uint64_t x, uint64_t y) {
++ return (x << y) | (x >> (64 - y));
++}
++
++// This is a portable string hasher. It is not cryptographically secure.
++// The implementation is extracted from the generic routine of aHash.
++class HashState {
++ uint64_t buffer;
++ uint64_t pad;
++ uint64_t extra_keys[2];
++ LIBC_INLINE void update(uint64_t low, uint64_t high) {
++ uint64_t combined =
++ folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
++ buffer = (buffer + pad) ^ combined;
++ buffer = rotate_left(buffer, ROTATE);
++ }
++
++ LIBC_INLINE HashState(
++ uint64_t a, uint64_t b, uint64_t c, uint64_t d
++ ) : buffer(a), pad(b), extra_keys{c, d} {
++ }
++
++ LIBC_INLINE static uint64_t mix(uint64_t seed) {
++ HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
++ RANDOMNESS[0][3]};
++ mixer.update(seed, 0);
++ return mixer.finish();
++ }
++
++public:
++ LIBC_INLINE HashState(uint64_t seed) {
++ // Mix one more round of the seed to make it
++ uint64_t mixed = mix(seed);
++ buffer = RANDOMNESS[1][0] ^ mixed;
++ pad = RANDOMNESS[1][1] ^ mixed;
++ extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
++ extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
++ }
++ LIBC_INLINE void update(const void *ptr, size_t size) {
++ uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
++ buffer = (buffer + size) * MULTIPLE;
++ uint64_t low, high;
++ if (size > 8) {
++ if (size > 16) {
++ // update tail
++ low = read_little_endian<uint64_t>(&bytes[size - 16]);
++ high = read_little_endian<uint64_t>(&bytes[size - 8]);
++ update(low, high);
++ while (size > 16) {
++ low = read_little_endian<uint64_t>(&bytes[0]);
++ high = read_little_endian<uint64_t>(&bytes[8]);
++ update(low, high);
++ bytes += 16;
++ size -= 16;
++ }
++ } else {
++ low = read_little_endian<uint64_t>(&bytes[0]);
++ high = read_little_endian<uint64_t>(&bytes[size - 8]);
++ update(low, high);
++ }
++ } else {
++ read_small_values(ptr, size, low, high);
++ update(low, high);
++ }
++ }
++ LIBC_INLINE uint64_t finish() {
++ uint64_t rot = buffer & 63;
++ uint64_t folded = folded_multiply(buffer, pad);
++ return rotate_left(folded, rot);
++ }
++};
++}
++
++static inline void llvm_libc_hash64 ( const void *key, int len, uint32_t seed, void *out) {
++ llvm_libc_hash::HashState state (seed);
++ state.update (key, len);
++ *(uint64_t*)out = state.finish();
++}
+diff --git a/PMP_Multilinear.h b/PMP_Multilinear.h
+index 9d24e73..42fdc5d 100644
+--- a/PMP_Multilinear.h
++++ b/PMP_Multilinear.h
+@@ -1417,8 +1417,8 @@ class PMP_Multilinear_Hasher
+ lowProduct.HighPart = midProduct.LowPart;
+ uint32_t hiProduct = c_ctr.HighPart * prevConstTerm.HighPart + midProduct.HighPart;
+
+- constTerm.QuadPart += lowProduct.QuadPart;
+- ctr += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
++ constTerm.QuadPart += lowProduct.QuadPart;
++ ctr.QuadPart += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
+
+ /* for ( uint32_t i=0; i<PMPML_CHUNK_SIZE; i+=8 )
+ {
+diff --git a/main.cpp b/main.cpp
+index eaaa0bd..d2029f4 100644
+--- a/main.cpp
++++ b/main.cpp
+@@ -88,6 +88,7 @@ const char* quality_str[3] = { "SKIP", "POOR", "GOOD" };
+ // marked with !! are known bad seeds, which either hash to 0 or create collisions.
+ HashInfo g_hashes[] =
+ {
++ { llvm_libc_hash64, 64, 0x00000000, "llvm_libc_hash64", "llvm_libc_hash 64bit", GOOD, {} },
+ // first the bad hash funcs, failing tests:
+ { DoNothingHash, 32, 0x0, "donothing32", "Do-Nothing function (measure call overhead)", SKIP, {0UL} /* !! */ },
+ { DoNothingHash, 64, 0x0, "donothing64", "Do-Nothing function (measure call overhead)", SKIP, {0ULL} /* !! */ },
diff --git a/libc/utils/smhasher/smhasher.txt b/libc/utils/smhasher/smhasher.txt
new file mode 100644
index 000000000000000..fce9cdcfca3357d
--- /dev/null
+++ b/libc/utils/smhasher/smhasher.txt
@@ -0,0 +1 @@
+7db446a0b8d2e29cd648fb5bf4224db9aed30905
\ No newline at end of file
>From 488e5f18207726dfe95ed19ec17b704826c85798 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Sun, 26 Nov 2023 23:41:07 -0500
Subject: [PATCH 2/8] fix code style
---
libc/src/__support/HashTable/CMakeLists.txt | 2 +-
libc/src/__support/HashTable/bitmask.h | 24 +++++----
.../HashTable/generic/bitmask_impl.inc | 8 +--
libc/src/__support/HashTable/randomness.h | 8 +--
.../__support/HashTable/sse2/bitmask_impl.inc | 6 +--
libc/src/__support/HashTable/table.h | 52 +++++++++++--------
libc/src/__support/hash.h | 29 ++++++-----
libc/src/__support/memory_size.h | 18 +++----
libc/test/src/__support/CMakeLists.txt | 3 +-
libc/test/src/__support/hash_test.cpp | 23 ++++----
10 files changed, 93 insertions(+), 80 deletions(-)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
index ae2fb640141c18c..3ba11dad0f6634e 100644
--- a/libc/src/__support/HashTable/CMakeLists.txt
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -25,13 +25,13 @@ add_header_library(
libc.src.__support.memory_size
libc.src.__support.bit
libc.src.__support.CPP.type_traits
+ libc.src.__support.CPP.new
libc.src.__support.macros.attributes
libc.src.__support.macros.optimization
libc.src.__support.hash
libc.src.string.memset
libc.src.string.strcmp
libc.src.string.strlen
- libc.include.stdlib
libc.include.llvm-libc-types.ENTRY
)
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
index 4f14c32e1ac25e7..4e34b156e4705eb 100644
--- a/libc/src/__support/HashTable/bitmask.h
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -32,18 +32,18 @@ namespace internal {
// =============================
template <typename T, T WORD_MASK, size_t WORD_STRIDE> struct BitMaskAdaptor {
// A masked constant whose bits are all set.
- constexpr static inline T MASK = WORD_MASK;
+ LIBC_INLINE_VAR constexpr static T MASK = WORD_MASK;
// A stride in the bitmask may use multiple bits.
- constexpr static inline size_t STRIDE = WORD_STRIDE;
+ LIBC_INLINE_VAR constexpr static size_t STRIDE = WORD_STRIDE;
T word;
// Check if any bit is set inside the word.
- bool any_bit_set() const { return word != 0; }
+ LIBC_INLINE bool any_bit_set() const { return word != 0; }
// Count trailing zeros with respect to stride. (Assume the bitmask is none
// zero.)
- size_t lowest_set_bit_nonzero() const {
+ LIBC_INLINE size_t lowest_set_bit_nonzero() const {
return unsafe_ctz<T>(word) / WORD_STRIDE;
}
};
@@ -54,7 +54,7 @@ template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
// Use the bitmask as an iterator. Update the state and return current lowest
// set bit. To make the bitmask iterable, each stride must contain 0 or exact
// 1 set bit.
- void remove_lowest_bit() {
+ LIBC_INLINE void remove_lowest_bit() {
// Remove the last set bit inside the word:
// word = 011110100 (original value)
// word - 1 = 011110011 (invert all bits up to the last set bit)
@@ -64,17 +64,19 @@ template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
using value_type = size_t;
using iterator = BitMask;
using const_iterator = BitMask;
- size_t operator*() const { return this->lowest_set_bit_nonzero(); }
- IteratableBitMaskAdaptor &operator++() {
+ LIBC_INLINE size_t operator*() const {
+ return this->lowest_set_bit_nonzero();
+ }
+ LIBC_INLINE IteratableBitMaskAdaptor &operator++() {
this->remove_lowest_bit();
return *this;
}
- IteratableBitMaskAdaptor begin() { return *this; }
- IteratableBitMaskAdaptor end() { return {0}; }
- bool operator==(const IteratableBitMaskAdaptor &other) {
+ LIBC_INLINE IteratableBitMaskAdaptor begin() { return *this; }
+ LIBC_INLINE IteratableBitMaskAdaptor end() { return {0}; }
+ LIBC_INLINE bool operator==(const IteratableBitMaskAdaptor &other) {
return this->word == other.word;
}
- bool operator!=(const IteratableBitMaskAdaptor &other) {
+ LIBC_INLINE bool operator!=(const IteratableBitMaskAdaptor &other) {
return this->word != other.word;
}
};
diff --git a/libc/src/__support/HashTable/generic/bitmask_impl.inc b/libc/src/__support/HashTable/generic/bitmask_impl.inc
index 24268d963f84b84..13e08382adf6221 100644
--- a/libc/src/__support/HashTable/generic/bitmask_impl.inc
+++ b/libc/src/__support/HashTable/generic/bitmask_impl.inc
@@ -16,7 +16,7 @@ namespace internal {
// byte | (byte << 8) = 0x000000000000ffff
// byte | (byte << 16) = 0x00000000ffffffff
// byte | (byte << 32) = 0xffffffffffffffff
-constexpr static inline uintptr_t repeat_byte(uintptr_t byte) {
+LIBC_INLINE constexpr uintptr_t repeat_byte(uintptr_t byte) {
size_t shift_amount = 8;
while (shift_amount < sizeof(uintptr_t) * 8) {
byte |= byte << shift_amount;
@@ -32,7 +32,7 @@ struct Group {
uintptr_t data;
// Load a group of control words from an arbitary address.
- static Group load(const void *__restrict addr) {
+ LIBC_INLINE static Group load(const void *__restrict addr) {
union {
uintptr_t value;
char bytes[sizeof(uintptr_t)];
@@ -44,7 +44,7 @@ struct Group {
// Find out the lanes equal to the given byte and return the bitmask
// with corresponding bits set.
- IteratableBitMask match_byte(uint8_t byte) const {
+ LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const {
// Given byte = 0x10, suppose the data is:
//
// data = [ 0x10 | 0x10 | 0x00 | 0xF1 | ... ]
@@ -94,7 +94,7 @@ struct Group {
// Find out the lanes equal to EMPTY or DELETE (highest bit set) and
// return the bitmask with corresponding bits set.
- BitMask mask_available() const {
+ LIBC_INLINE BitMask mask_available() const {
return {LIBC_NAMESPACE::Endian::to_little_endian(data) & repeat_byte(0x80)};
}
};
diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h
index d0336f0957572ec..0b88a3928f7d20a 100644
--- a/libc/src/__support/HashTable/randomness.h
+++ b/libc/src/__support/HashTable/randomness.h
@@ -20,12 +20,12 @@
namespace LIBC_NAMESPACE {
namespace internal {
namespace randomness {
-LIBC_INLINE_VAR thread_local static HashState state = {
+LIBC_INLINE_VAR thread_local HashState state = {
0x38049a7ea6f5a79b, 0x45cb02147c3f718a, 0x53eb431c12770718,
0x5b55742bd20a2fcb};
-LIBC_INLINE_VAR thread_local static uint64_t counter = 0;
-LIBC_INLINE_VAR constexpr static uint64_t RESEED_PERIOD = 1024;
-static LIBC_INLINE uint64_t next_random_seed() {
+LIBC_INLINE_VAR thread_local uint64_t counter = 0;
+LIBC_INLINE_VAR constexpr uint64_t RESEED_PERIOD = 1024;
+LIBC_INLINE uint64_t next_random_seed() {
if (counter % RESEED_PERIOD == 0) {
uint64_t entropy[2];
entropy[0] = reinterpret_cast<uint64_t>(&entropy);
diff --git a/libc/src/__support/HashTable/sse2/bitmask_impl.inc b/libc/src/__support/HashTable/sse2/bitmask_impl.inc
index 00d5640303dc6cb..abdf167efcef798 100644
--- a/libc/src/__support/HashTable/sse2/bitmask_impl.inc
+++ b/libc/src/__support/HashTable/sse2/bitmask_impl.inc
@@ -19,19 +19,19 @@ struct Group {
__m128i data;
// Load a group of control words from an arbitary address.
- static Group load(const void *__restrict addr) {
+ LIBC_INLINE static Group load(const void *__restrict addr) {
return {_mm_loadu_si128(static_cast<const __m128i *>(addr))};
}
// Find out the lanes equal to the given byte and return the bitmask
// with corresponding bits set.
- IteratableBitMask match_byte(uint8_t byte) const {
+ LIBC_INLINE IteratableBitMask match_byte(uint8_t byte) const {
auto cmp = _mm_cmpeq_epi8(data, _mm_set1_epi8(byte));
auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(cmp));
return {bitmask};
}
- BitMask mask_available() const {
+ LIBC_INLINE BitMask mask_available() const {
auto bitmask = static_cast<uint16_t>(_mm_movemask_epi8(data));
return {bitmask};
}
diff --git a/libc/src/__support/HashTable/table.h b/libc/src/__support/HashTable/table.h
index 4ec2ca81ee1aaed..16e828c1d2674db 100644
--- a/libc/src/__support/HashTable/table.h
+++ b/libc/src/__support/HashTable/table.h
@@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_table_H
#include "include/llvm-libc-types/ENTRY.h"
+#include "src/__support/CPP/new.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/HashTable/bitmask.h"
#include "src/__support/bit.h"
@@ -22,12 +23,11 @@
#include "src/string/strlen.h"
#include <stddef.h>
#include <stdint.h>
-#include <stdlib.h>
namespace LIBC_NAMESPACE {
namespace internal {
-static LIBC_INLINE uint8_t secondary_hash(uint64_t hash) {
+LIBC_INLINE uint8_t secondary_hash(uint64_t hash) {
// top 7 bits of the hash.
return static_cast<uint8_t>((hash >> 57) & 0x7f);
}
@@ -50,7 +50,7 @@ struct ProbeSequence {
size_t stride;
size_t entries_mask;
- size_t next() {
+ LIBC_INLINE size_t next() {
position += stride;
position &= entries_mask;
stride += sizeof(Group);
@@ -62,7 +62,7 @@ struct ProbeSequence {
// need to do the fixup when we set the control bytes.
// The number of entries is at least 8: we don't have to worry
// about special sizes when check the fullness of the table.
-static LIBC_INLINE size_t capacity_to_entries(size_t cap) {
+LIBC_INLINE size_t capacity_to_entries(size_t cap) {
if (8 >= sizeof(Group) && cap < 8)
return 8;
if (16 >= sizeof(Group) && cap < 15)
@@ -92,46 +92,50 @@ struct HashTable {
size_t available_slots; // less than capacity
private:
// How many entries are there in the table.
- size_t num_of_entries() const { return entries_mask + 1; }
+ LIBC_INLINE size_t num_of_entries() const { return entries_mask + 1; }
- bool is_full() const { return available_slots == 0; }
+ LIBC_INLINE bool is_full() const { return available_slots == 0; }
- size_t offset_from_entries() const {
+ LIBC_INLINE size_t offset_from_entries() const {
size_t entries_size = num_of_entries() * sizeof(ENTRY);
- return entries_size + offset_to(entries_size, alignof(HashTable));
+ return entries_size + offset_to(entries_size, table_alignment());
}
- constexpr static size_t table_alignment() {
+ LIBC_INLINE constexpr static size_t table_alignment() {
return alignof(HashTable) > alignof(ENTRY) ? alignof(HashTable)
: alignof(ENTRY);
}
- constexpr static size_t offset_to_groups() { return sizeof(HashTable); }
+ LIBC_INLINE constexpr static size_t offset_to_groups() {
+ return sizeof(HashTable);
+ }
- ENTRY &entry(size_t i) { return reinterpret_cast<ENTRY *>(this)[-i - 1]; }
+ LIBC_INLINE ENTRY &entry(size_t i) {
+ return reinterpret_cast<ENTRY *>(this)[-i - 1];
+ }
- uint8_t &control(size_t i) {
+ LIBC_INLINE uint8_t &control(size_t i) {
uint8_t *ptr = reinterpret_cast<uint8_t *>(this) + offset_to_groups();
return ptr[i];
}
// We duplicate a group of control bytes to the end. Thus, it is possible that
// we need to set two control bytes at the same time.
- void set_ctrl(size_t index, uint8_t value) {
+ LIBC_INLINE void set_ctrl(size_t index, uint8_t value) {
size_t index2 = ((index - sizeof(Group)) & entries_mask) + sizeof(Group);
control(index) = value;
control(index2) = value;
}
public:
- static void deallocate(HashTable *table) {
+ LIBC_INLINE static void deallocate(HashTable *table) {
if (table) {
void *ptr =
reinterpret_cast<uint8_t *>(table) - table->offset_from_entries();
- free(ptr);
+ operator delete(ptr, std::align_val_t{table_alignment()});
}
}
- static HashTable *allocate(size_t capacity, uint64_t randomness) {
+ LIBC_INLINE static HashTable *allocate(size_t capacity, uint64_t randomness) {
// check if capacity_to_entries overflows MAX_MEM_SIZE
if (capacity > size_t{1} << (8 * sizeof(size_t) - 1 - 3))
return nullptr;
@@ -144,12 +148,14 @@ struct HashTable {
(align_boundary + header_size + ctrl_sizes).align_up(table_alignment());
if (!total_size.valid())
return nullptr;
- void *mem =
- aligned_alloc(table_alignment(), static_cast<size_t>(total_size));
+ AllocChecker ac;
+
+ void *mem = operator new(total_size, std::align_val_t{table_alignment()},
+ ac);
HashTable *table = reinterpret_cast<HashTable *>(
static_cast<uint8_t *>(mem) + align_boundary);
- if (mem) {
+ if (ac) {
table->entries_mask = entries - 1u;
table->available_slots = entries / 8 * 7;
table->state = HashState{randomness};
@@ -160,7 +166,7 @@ struct HashTable {
}
private:
- size_t find(const char *key, uint64_t primary) {
+ LIBC_INLINE size_t find(const char *key, uint64_t primary) {
uint8_t secondary = secondary_hash(primary);
ProbeSequence sequence{static_cast<size_t>(primary), 0, entries_mask};
while (true) {
@@ -187,7 +193,7 @@ struct HashTable {
}
private:
- ENTRY *insert(ENTRY item, uint64_t primary) {
+ LIBC_INLINE ENTRY *insert(ENTRY item, uint64_t primary) {
auto index = find(item.key, primary);
auto slot = &this->entry(index);
// SVr4 and POSIX.1-2001 specify that action is significant only for
@@ -207,7 +213,7 @@ struct HashTable {
}
public:
- ENTRY *find(const char *key) {
+ LIBC_INLINE ENTRY *find(const char *key) {
LIBC_NAMESPACE::internal::HashState hasher = state;
hasher.update(key, strlen(key));
uint64_t primary = hasher.finish();
@@ -216,7 +222,7 @@ struct HashTable {
return nullptr;
return &entry;
}
- ENTRY *insert(ENTRY item) {
+ LIBC_INLINE ENTRY *insert(ENTRY item) {
LIBC_NAMESPACE::internal::HashState hasher = state;
hasher.update(item.key, strlen(item.key));
uint64_t primary = hasher.finish();
diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h
index 982e437116f348c..04d9060cc7728fc 100644
--- a/libc/src/__support/hash.h
+++ b/libc/src/__support/hash.h
@@ -19,7 +19,7 @@ namespace internal {
// Folded multiplication.
// This function multiplies two 64-bit integers and xor the high and
// low 64-bit parts of the result.
-LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
+LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
uint64_t low = static_cast<uint64_t>(p & mask);
@@ -30,7 +30,7 @@ LIBC_INLINE static uint64_t folded_multiply(uint64_t x, uint64_t y) {
// Read as little endian.
// Shift-and-or implementation does not give a satisfactory code on aarch64.
// Therefore, we use a union to read the value.
-template <typename T> LIBC_INLINE static T read_little_endian(const void *ptr) {
+template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
union {
T value;
@@ -50,8 +50,8 @@ template <typename T> LIBC_INLINE static T read_little_endian(const void *ptr) {
}
// Specialized read functions for small values. size must be <= 8.
-LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
- uint64_t &low, uint64_t &high) {
+LIBC_INLINE void read_small_values(const void *ptr, size_t size, uint64_t &low,
+ uint64_t &high) {
const uint8_t *bytes = static_cast<const uint8_t *>(ptr);
if (size >= 2) {
if (size >= 4) {
@@ -74,19 +74,19 @@ LIBC_INLINE static void read_small_values(const void *ptr, size_t size,
}
// This constant comes from Kunth's prng (it empirically works well).
-LIBC_INLINE_VAR static constexpr uint64_t MULTIPLE = 6364136223846793005;
+LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;
// Rotation amount for mixing.
-LIBC_INLINE_VAR static constexpr uint64_t ROTATE = 23;
+LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;
// Randomly generated values (for now, it uses the same values as in aHash).
-LIBC_INLINE_VAR static constexpr uint64_t RANDOMNESS[2][4] = {
+LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
{0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
0x082efa98ec4e6c89},
{0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
0x3f84d5b5b5470917},
};
-LIBC_INLINE static uint64_t rotate_left(uint64_t x, uint64_t y) {
+LIBC_INLINE uint64_t rotate_left(uint64_t x, uint64_t y) {
return (x << y) | (x >> (64 - y));
}
@@ -97,13 +97,13 @@ class HashState {
uint64_t buffer;
uint64_t pad;
uint64_t extra_keys[2];
- void update(uint64_t low, uint64_t high) {
+ LIBC_INLINE void update(uint64_t low, uint64_t high) {
uint64_t combined =
folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
buffer = (buffer + pad) ^ combined;
buffer = rotate_left(buffer, ROTATE);
}
- static uint64_t mix(uint64_t seed) {
+ LIBC_INLINE static uint64_t mix(uint64_t seed) {
HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
RANDOMNESS[0][3]};
mixer.update(seed, 0);
@@ -111,9 +111,10 @@ class HashState {
}
public:
- constexpr HashState(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+ LIBC_INLINE constexpr HashState(uint64_t a, uint64_t b, uint64_t c,
+ uint64_t d)
: buffer(a), pad(b), extra_keys{c, d} {}
- HashState(uint64_t seed) {
+ LIBC_INLINE HashState(uint64_t seed) {
// Mix one more round of the seed to make it stronger.
uint64_t mixed = mix(seed);
buffer = RANDOMNESS[1][0] ^ mixed;
@@ -121,7 +122,7 @@ class HashState {
extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
}
- void update(const void *ptr, size_t size) {
+ LIBC_INLINE void update(const void *ptr, size_t size) {
uint8_t const *bytes = static_cast<const uint8_t *>(ptr);
buffer = (buffer + size) * MULTIPLE;
uint64_t low, high;
@@ -148,7 +149,7 @@ class HashState {
update(low, high);
}
}
- uint64_t finish() {
+ LIBC_INLINE uint64_t finish() {
uint64_t rot = buffer & 63;
uint64_t folded = folded_multiply(buffer, pad);
return rotate_left(folded, rot);
diff --git a/libc/src/__support/memory_size.h b/libc/src/__support/memory_size.h
index 7206a50c19d7228..d8438048229aebd 100644
--- a/libc/src/__support/memory_size.h
+++ b/libc/src/__support/memory_size.h
@@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {
namespace internal {
-template <class T> LIBC_INLINE static bool mul_overflow(T a, T b, T *res) {
+template <class T> LIBC_INLINE bool mul_overflow(T a, T b, T *res) {
#if defined(__has_builtin) && __has_builtin(__builtin_mul_overflow)
return __builtin_mul_overflow(a, b, res);
#else
@@ -32,23 +32,23 @@ class SafeMemSize {
private:
using type = cpp::make_signed_t<size_t>;
type value;
- explicit SafeMemSize(type value) : value(value) {}
+ LIBC_INLINE explicit SafeMemSize(type value) : value(value) {}
public:
- static constexpr size_t MAX_MEM_SIZE =
+ LIBC_INLINE_VAR static constexpr size_t MAX_MEM_SIZE =
static_cast<size_t>(cpp::numeric_limits<type>::max());
- explicit SafeMemSize(size_t value)
+ LIBC_INLINE explicit SafeMemSize(size_t value)
: value(value <= MAX_MEM_SIZE ? static_cast<type>(value) : -1) {}
- operator size_t() { return static_cast<size_t>(value); }
- bool valid() { return value >= 0; }
- SafeMemSize operator+(const SafeMemSize &other) {
+ LIBC_INLINE operator size_t() { return static_cast<size_t>(value); }
+ LIBC_INLINE bool valid() { return value >= 0; }
+ LIBC_INLINE SafeMemSize operator+(const SafeMemSize &other) {
type result;
if (LIBC_UNLIKELY((value | other.value) < 0))
result = -1;
result = value + other.value;
return SafeMemSize{result};
}
- SafeMemSize operator*(const SafeMemSize &other) {
+ LIBC_INLINE SafeMemSize operator*(const SafeMemSize &other) {
type result;
if (LIBC_UNLIKELY((value | other.value) < 0))
result = -1;
@@ -56,7 +56,7 @@ class SafeMemSize {
result = -1;
return SafeMemSize{result};
}
- SafeMemSize align_up(size_t alignment) {
+ LIBC_INLINE SafeMemSize align_up(size_t alignment) {
if (!is_power_of_two(alignment) || alignment > MAX_MEM_SIZE || !valid())
return SafeMemSize{type{-1}};
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 8b323adfedb21b0..091f202af92b0ab 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -134,7 +134,8 @@ add_libc_test(
hash_test.cpp
DEPENDS
libc.src.__support.hash
- libc.include.stdlib
+ libc.src.__support.CPP.new
+ libc.src.stdlib.rand
UNIT_TEST_ONLY
# Aligned Allocation is not supported in hermetic builds.
)
diff --git a/libc/test/src/__support/hash_test.cpp b/libc/test/src/__support/hash_test.cpp
index 1b86d21a1fd4913..bc26ed0ce79753a 100644
--- a/libc/test/src/__support/hash_test.cpp
+++ b/libc/test/src/__support/hash_test.cpp
@@ -6,23 +6,26 @@
//
//===----------------------------------------------------------------------===//
+#include "src/__support/CPP/new.h"
#include "src/__support/hash.h"
+#include "src/stdlib/rand.h"
#include "test/UnitTest/Test.h"
-#include <cstdint>
#include <stddef.h>
-#include <stdlib.h>
#include <string.h>
template <class T> struct AlignedMemory {
T *data;
size_t offset;
- AlignedMemory(size_t size, size_t alignment, size_t offset) : offset(offset) {
+ std::align_val_t alignment;
+ AlignedMemory(size_t size, size_t alignment, size_t offset)
+ : offset(offset), alignment{alignment} {
size_t sz = size * sizeof(T);
size_t aligned = sz + ((-sz) & (alignment - 1)) + alignment;
- data = static_cast<T *>(::aligned_alloc(alignment, aligned));
+ LIBC_NAMESPACE::AllocChecker ac;
+ data = static_cast<T *>(operator new(aligned, this->alignment, ac));
data += offset % alignment;
}
- ~AlignedMemory() { ::free(data - offset); }
+ ~AlignedMemory() { operator delete(data - offset, alignment); }
};
size_t sizes[] = {0, 1, 23, 59, 1024, 5261};
@@ -35,14 +38,14 @@ TEST(LlvmLibcHashTest, SanityCheck) {
uint64_t hash;
{
AlignedMemory<char> mem(sz, 64, 0);
- ::memset(mem.data, val, sz);
+ memset(mem.data, val, sz);
LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
state.update(mem.data, sz);
hash = state.finish();
}
for (size_t offset = 1; offset < 64; ++offset) {
AlignedMemory<char> mem(sz, 64, offset);
- ::memset(mem.data, val, sz);
+ memset(mem.data, val, sz);
LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
state.update(mem.data, sz);
ASSERT_EQ(hash, state.finish());
@@ -67,7 +70,7 @@ TEST(LlvmLibcHashTest, Avalanche) {
for (uint8_t val : values) {
uint64_t hash;
AlignedMemory<char> mem(sz, 64, 0);
- ::memset(mem.data, val, sz);
+ memset(mem.data, val, sz);
{
LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
state.update(mem.data, sz);
@@ -96,7 +99,7 @@ TEST(LlvmLibcHashTest, UniformLSB) {
srand(0);
for (size_t sz : sizes) {
AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
- ::memset(counters.data, 0, sz * sizeof(size_t));
+ memset(counters.data, 0, sz * sizeof(size_t));
for (size_t i = 0; i < 200 * sz; ++i) {
int randomness[8] = {rand(), rand(), rand(), rand(),
rand(), rand(), rand(), rand()};
@@ -121,7 +124,7 @@ TEST(LlvmLibcHashTest, UniformLSB) {
TEST(LlvmLibcHashTest, UniformMSB) {
size_t sz = 1 << 7;
AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
- ::memset(counters.data, 0, sz * sizeof(size_t));
+ memset(counters.data, 0, sz * sizeof(size_t));
for (size_t i = 0; i < 200 * sz; ++i) {
LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8};
state.update(&i, sizeof(i));
>From 2e98b1628570c3469b0dab46ed2c76c334a9137c Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Mon, 27 Nov 2023 00:17:39 -0500
Subject: [PATCH 3/8] fix: calling functions in wrong namespaces
---
libc/test/src/__support/CMakeLists.txt | 2 ++
.../src/__support/HashTable/CMakeLists.txt | 7 +++---
.../src/__support/HashTable/group_test.cpp | 2 +-
libc/test/src/__support/hash_test.cpp | 22 ++++++++++---------
libc/test/src/__support/memory_size_test.cpp | 1 -
5 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 091f202af92b0ab..2b9fa93bb548e53 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -136,6 +136,8 @@ add_libc_test(
libc.src.__support.hash
libc.src.__support.CPP.new
libc.src.stdlib.rand
+ libc.src.stdlib.srand
+ libc.src.string.memset
UNIT_TEST_ONLY
# Aligned Allocation is not supported in hermetic builds.
)
diff --git a/libc/test/src/__support/HashTable/CMakeLists.txt b/libc/test/src/__support/HashTable/CMakeLists.txt
index d1364f35840c2be..263f63e453b0eff 100644
--- a/libc/test/src/__support/HashTable/CMakeLists.txt
+++ b/libc/test/src/__support/HashTable/CMakeLists.txt
@@ -6,7 +6,6 @@ add_libc_test(
bitmask_test.cpp
DEPENDS
libc.src.__support.HashTable.bitmask
- libc.include.stdlib
)
add_libc_test(
table_test
@@ -30,7 +29,7 @@ if (CPU_SUPPORTS_SSE2)
group_test.cpp
DEPENDS
libc.src.__support.HashTable.bitmask
- libc.include.stdlib
+ libc.src.stdlib.rand
libc.src.string.memcmp
COMPILE_OPTIONS
${LIBC_COMPILE_OPTIONS_NATIVE}
@@ -44,7 +43,7 @@ if (CPU_SUPPORTS_SSE2)
group_test.cpp
DEPENDS
libc.src.__support.HashTable.bitmask
- libc.include.stdlib
+ libc.src.stdlib.rand
libc.src.string.memcmp
COMPILE_OPTIONS
-mno-sse2 -mtune=generic
@@ -59,7 +58,7 @@ else()
group_test.cpp
DEPENDS
libc.src.__support.HashTable.bitmask
- libc.include.stdlib
+ libc.src.stdlib.rand
libc.src.string.memcmp
UNIT_TEST_ONLY
)
diff --git a/libc/test/src/__support/HashTable/group_test.cpp b/libc/test/src/__support/HashTable/group_test.cpp
index 810fff093b621ac..3926015d2184ec7 100644
--- a/libc/test/src/__support/HashTable/group_test.cpp
+++ b/libc/test/src/__support/HashTable/group_test.cpp
@@ -8,10 +8,10 @@
#include "src/__support/HashTable/bitmask.h"
+#include "src/stdlib/rand.h"
#include "src/string/memcmp.h"
#include "test/UnitTest/Test.h"
#include <stdint.h>
-#include <stdlib.h>
namespace LIBC_NAMESPACE {
namespace internal {
diff --git a/libc/test/src/__support/hash_test.cpp b/libc/test/src/__support/hash_test.cpp
index bc26ed0ce79753a..612efd544c66f9f 100644
--- a/libc/test/src/__support/hash_test.cpp
+++ b/libc/test/src/__support/hash_test.cpp
@@ -9,9 +9,9 @@
#include "src/__support/CPP/new.h"
#include "src/__support/hash.h"
#include "src/stdlib/rand.h"
+#include "src/stdlib/srand.h"
+#include "src/string/memset.h"
#include "test/UnitTest/Test.h"
-#include <stddef.h>
-#include <string.h>
template <class T> struct AlignedMemory {
T *data;
@@ -38,14 +38,14 @@ TEST(LlvmLibcHashTest, SanityCheck) {
uint64_t hash;
{
AlignedMemory<char> mem(sz, 64, 0);
- memset(mem.data, val, sz);
+ LIBC_NAMESPACE::memset(mem.data, val, sz);
LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
state.update(mem.data, sz);
hash = state.finish();
}
for (size_t offset = 1; offset < 64; ++offset) {
AlignedMemory<char> mem(sz, 64, offset);
- memset(mem.data, val, sz);
+ LIBC_NAMESPACE::memset(mem.data, val, sz);
LIBC_NAMESPACE::internal::HashState state{0x1234567890abcdef};
state.update(mem.data, sz);
ASSERT_EQ(hash, state.finish());
@@ -70,7 +70,7 @@ TEST(LlvmLibcHashTest, Avalanche) {
for (uint8_t val : values) {
uint64_t hash;
AlignedMemory<char> mem(sz, 64, 0);
- memset(mem.data, val, sz);
+ LIBC_NAMESPACE::memset(mem.data, val, sz);
{
LIBC_NAMESPACE::internal::HashState state{0xabcdef1234567890};
state.update(mem.data, sz);
@@ -96,13 +96,15 @@ TEST(LlvmLibcHashTest, Avalanche) {
// Hash a random sequence of input. The LSB should be uniform enough such that
// values spread across the entire range.
TEST(LlvmLibcHashTest, UniformLSB) {
- srand(0);
+ LIBC_NAMESPACE::srand(0xffffffff);
for (size_t sz : sizes) {
AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
- memset(counters.data, 0, sz * sizeof(size_t));
+ LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t));
for (size_t i = 0; i < 200 * sz; ++i) {
- int randomness[8] = {rand(), rand(), rand(), rand(),
- rand(), rand(), rand(), rand()};
+ int randomness[8] = {LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
+ LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
+ LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand(),
+ LIBC_NAMESPACE::rand(), LIBC_NAMESPACE::rand()};
{
LIBC_NAMESPACE::internal::HashState state{0x1a2b3c4d5e6f7a8b};
state.update(randomness, sizeof(randomness));
@@ -124,7 +126,7 @@ TEST(LlvmLibcHashTest, UniformLSB) {
TEST(LlvmLibcHashTest, UniformMSB) {
size_t sz = 1 << 7;
AlignedMemory<size_t> counters(sz, sizeof(size_t), 0);
- memset(counters.data, 0, sz * sizeof(size_t));
+ LIBC_NAMESPACE::memset(counters.data, 0, sz * sizeof(size_t));
for (size_t i = 0; i < 200 * sz; ++i) {
LIBC_NAMESPACE::internal::HashState state{0xa1b2c3d4e5f6a7b8};
state.update(&i, sizeof(i));
diff --git a/libc/test/src/__support/memory_size_test.cpp b/libc/test/src/__support/memory_size_test.cpp
index 486436eff267acf..98b6a613e62fb42 100644
--- a/libc/test/src/__support/memory_size_test.cpp
+++ b/libc/test/src/__support/memory_size_test.cpp
@@ -8,7 +8,6 @@
#include "src/__support/memory_size.h"
#include "test/UnitTest/Test.h"
-#include <cstddef>
namespace LIBC_NAMESPACE {
namespace internal {
>From ff981ed7eaa389a8ecc7ed957e0d777e18b9f309 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Mon, 27 Nov 2023 01:26:03 -0500
Subject: [PATCH 4/8] fix: use libc macro definitions
---
libc/src/__support/CMakeLists.txt | 1 +
libc/src/__support/HashTable/CMakeLists.txt | 2 +-
libc/src/__support/HashTable/bitmask.h | 3 ++-
libc/src/__support/memory_size.h | 2 +-
4 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index cdd9d9dfe55a4d4..684c6ee8f9a545b 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -248,6 +248,7 @@ add_header_library(
libc.src.__support.CPP.limits
libc.src.__support.macros.optimization
libc.src.__support.macros.attributes
+ libc.src.__support.macros.config
)
add_subdirectory(FPUtil)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
index 3ba11dad0f6634e..e62d4eb6b71d8f1 100644
--- a/libc/src/__support/HashTable/CMakeLists.txt
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -3,8 +3,8 @@ add_header_library(
HDRS
bitmask.h
DEPENDS
- libc.src.__support.common
libc.src.__support.bit
+ libc.src.__support.macros.properties.cpu_features
)
list(FIND TARGET_ENTRYPOINT_NAME_LIST getrandom getrandom_index)
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
index 4e34b156e4705eb..e039393be645983 100644
--- a/libc/src/__support/HashTable/bitmask.h
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -10,6 +10,7 @@
#define LLVM_LIBC_SRC___SUPPORT_HASHTABLE_BITMASK_H
#include "src/__support/bit.h"
+#include "src/__support/macros/properties/cpu_features.h"
#include <stddef.h> // size_t
#include <stdint.h> // uint8_t, uint64_t
@@ -84,7 +85,7 @@ template <class BitMask> struct IteratableBitMaskAdaptor : public BitMask {
} // namespace internal
} // namespace LIBC_NAMESPACE
-#if defined(__SSE2__)
+#if defined(LIBC_TARGET_CPU_HAS_SSE2)
#include "sse2/bitmask_impl.inc"
#else
#include "generic/bitmask_impl.inc"
diff --git a/libc/src/__support/memory_size.h b/libc/src/__support/memory_size.h
index d8438048229aebd..df179a6604714b1 100644
--- a/libc/src/__support/memory_size.h
+++ b/libc/src/__support/memory_size.h
@@ -15,7 +15,7 @@
namespace LIBC_NAMESPACE {
namespace internal {
template <class T> LIBC_INLINE bool mul_overflow(T a, T b, T *res) {
-#if defined(__has_builtin) && __has_builtin(__builtin_mul_overflow)
+#if LIBC_HAS_BUILTIN(__builtin_mul_overflow)
return __builtin_mul_overflow(a, b, res);
#else
T max = cpp::numeric_limits<T>::max();
>From 4573447b22cbbd2242074be9ce03222f074d923c Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Mon, 27 Nov 2023 19:08:42 -0500
Subject: [PATCH 5/8] address code reviews.
Some more changes include:
* Move `rotate_left` to `__support/bit.h` as it can be made into a more general function.
* Add more tests for `rotate_left` and `offset_to`.
---
libc/src/__support/CMakeLists.txt | 4 ++-
libc/src/__support/HashTable/randomness.h | 3 +++
libc/src/__support/HashTable/table.h | 6 ++---
libc/src/__support/bit.h | 11 ++++++++
libc/src/__support/hash.h | 19 ++++++++------
libc/src/search/hdestroy.cpp | 5 ++--
libc/src/search/hdestroy_r.cpp | 6 ++---
libc/test/src/__support/bit_test.cpp | 32 +++++++++++++++++++++++
8 files changed, 68 insertions(+), 18 deletions(-)
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 684c6ee8f9a545b..a76b22960f5a504 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -32,6 +32,7 @@ add_header_library(
bit.h
DEPENDS
libc.src.__support.macros.attributes
+ libc.src.__support.CPP.type_traits
)
add_header_library(
@@ -235,8 +236,9 @@ add_header_library(
HDRS
hash.h
DEPENDS
- .common
+ .bit
.uint128
+ libc.src.__support.macros.attributes
)
add_header_library(
diff --git a/libc/src/__support/HashTable/randomness.h b/libc/src/__support/HashTable/randomness.h
index 0b88a3928f7d20a..bcc91190e9d4522 100644
--- a/libc/src/__support/HashTable/randomness.h
+++ b/libc/src/__support/HashTable/randomness.h
@@ -20,6 +20,9 @@
namespace LIBC_NAMESPACE {
namespace internal {
namespace randomness {
+// We need an initial state for the hash function. More entropy are to be added
+// at the first use and each round of reseeding. The following random numbers
+// are generated from https://www.random.org/cgi-bin/randbyte?nbytes=64&format=h
LIBC_INLINE_VAR thread_local HashState state = {
0x38049a7ea6f5a79b, 0x45cb02147c3f718a, 0x53eb431c12770718,
0x5b55742bd20a2fcb};
diff --git a/libc/src/__support/HashTable/table.h b/libc/src/__support/HashTable/table.h
index 16e828c1d2674db..ec0ec78869ad582 100644
--- a/libc/src/__support/HashTable/table.h
+++ b/libc/src/__support/HashTable/table.h
@@ -29,7 +29,7 @@ namespace internal {
LIBC_INLINE uint8_t secondary_hash(uint64_t hash) {
// top 7 bits of the hash.
- return static_cast<uint8_t>((hash >> 57) & 0x7f);
+ return static_cast<uint8_t>(hash >> 57);
}
// Probe sequence based on triangular numbers, which is guaranteed (since our
@@ -199,9 +199,9 @@ struct HashTable {
// SVr4 and POSIX.1-2001 specify that action is significant only for
// unsuccessful searches, so that an ENTER should not do anything
// for a successful search.
- if (slot->key != nullptr) {
+ if (slot->key != nullptr)
return slot;
- }
+
if (!is_full()) {
set_ctrl(index, secondary_hash(primary));
slot->key = item.key;
diff --git a/libc/src/__support/bit.h b/libc/src/__support/bit.h
index 5b97d9c38e0cd6e..ab2e07744a866f2 100644
--- a/libc/src/__support/bit.h
+++ b/libc/src/__support/bit.h
@@ -10,6 +10,7 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_BIT_H
#define LLVM_LIBC_SRC___SUPPORT_BIT_H
+#include "src/__support/CPP/type_traits.h" // make_unsigned
#include "src/__support/macros/attributes.h" // LIBC_INLINE
namespace LIBC_NAMESPACE {
@@ -101,6 +102,16 @@ template <typename T> LIBC_INLINE constexpr T offset_to(T val, T align) {
return (-val) & (align - 1);
}
+template <typename T> LIBC_INLINE constexpr T rotate_left(T val, T amount) {
+ // Implementation taken from "Safe, Efficient, and Portable Rotate in C/C++"
+ // https://blog.regehr.org/archives/1063
+ // Using the safe version as the rotation pattern is now recognized by both
+ // GCC and Clang.
+ using U = cpp::make_unsigned_t<T>;
+ U v = static_cast<U>(val);
+ U a = static_cast<U>(amount);
+ return (v << a) | (v >> ((-a) & (sizeof(U) * 8 - 1)));
+}
} // namespace LIBC_NAMESPACE
#endif // LLVM_LIBC_SRC___SUPPORT_BIT_H
diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h
index 04d9060cc7728fc..d3e706e4424eda0 100644
--- a/libc/src/__support/hash.h
+++ b/libc/src/__support/hash.h
@@ -9,9 +9,10 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_HASH_H
#define LLVM_LIBC_SRC___SUPPORT_HASH_H
-#include "src/__support/UInt128.h" // UInt128
-#include "src/__support/macros/attributes.h"
-#include <stdint.h> // For uint64_t
+#include "src/__support/UInt128.h" // UInt128
+#include "src/__support/bit.h" // rotate_left
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include <stdint.h> // For uint64_t
namespace LIBC_NAMESPACE {
namespace internal {
@@ -38,6 +39,10 @@ template <typename T> LIBC_INLINE T read_little_endian(const void *ptr) {
} data;
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
// Compiler should able to optimize this as a load followed by a byte swap.
+ // On aarch64 (-mbig-endian), this compiles to the following for int:
+ // ldr w0, [x0]
+ // rev w0, w0
+ // ret
for (size_t i = 0; i < sizeof(T); ++i) {
data.buffer[i] = bytes[sizeof(T) - i - 1];
}
@@ -78,7 +83,9 @@ LIBC_INLINE_VAR constexpr uint64_t MULTIPLE = 6364136223846793005;
// Rotation amount for mixing.
LIBC_INLINE_VAR constexpr uint64_t ROTATE = 23;
-// Randomly generated values (for now, it uses the same values as in aHash).
+// Randomly generated values. For now, we use the same values as in aHash as
+// they are widely tested.
+// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
{0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
0x082efa98ec4e6c89},
@@ -86,10 +93,6 @@ LIBC_INLINE_VAR constexpr uint64_t RANDOMNESS[2][4] = {
0x3f84d5b5b5470917},
};
-LIBC_INLINE uint64_t rotate_left(uint64_t x, uint64_t y) {
- return (x << y) | (x >> (64 - y));
-}
-
// This is a portable string hasher. It is not cryptographically secure.
// The quality of the hash is good enough to pass all tests in SMHasher.
// The implementation is derived from the generic routine of aHash.
diff --git a/libc/src/search/hdestroy.cpp b/libc/src/search/hdestroy.cpp
index 1981d371a9fb755..b3a15aab02b6449 100644
--- a/libc/src/search/hdestroy.cpp
+++ b/libc/src/search/hdestroy.cpp
@@ -12,9 +12,8 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(void, hdestroy, (void)) {
- using namespace internal;
- HashTable::deallocate(global_hash_table);
- global_hash_table = nullptr;
+ internal::HashTable::deallocate(internal::global_hash_table);
+ internal::global_hash_table = nullptr;
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/search/hdestroy_r.cpp b/libc/src/search/hdestroy_r.cpp
index f0dd872d874a77e..e2fda93931f7826 100644
--- a/libc/src/search/hdestroy_r.cpp
+++ b/libc/src/search/hdestroy_r.cpp
@@ -12,13 +12,13 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(void, hdestroy_r, (struct hsearch_data * htab)) {
- using namespace internal;
if (htab == nullptr) {
libc_errno = EINVAL;
return;
}
- HashTable *table = static_cast<HashTable *>(htab->__opaque);
- HashTable::deallocate(table);
+ internal::HashTable *table =
+ static_cast<internal::HashTable *>(htab->__opaque);
+ internal::HashTable::deallocate(table);
htab->__opaque = nullptr;
}
diff --git a/libc/test/src/__support/bit_test.cpp b/libc/test/src/__support/bit_test.cpp
index c25aef76e0266e1..e585735394e2c77 100644
--- a/libc/test/src/__support/bit_test.cpp
+++ b/libc/test/src/__support/bit_test.cpp
@@ -15,6 +15,38 @@ TEST(LlvmLibcBlockBitTest, TODO) {
// TODO Implement me.
}
+TEST(LlvmLibcBlockBitTest, OffsetTo) {
+ ASSERT_EQ(offset_to(0, 512), 0);
+ ASSERT_EQ(offset_to(1, 512), 511);
+ ASSERT_EQ(offset_to(2, 512), 510);
+ ASSERT_EQ(offset_to(13, 1), 0);
+ ASSERT_EQ(offset_to(13, 4), 3);
+ for (unsigned int i = 0; i < 31; ++i) {
+ ASSERT_EQ((offset_to(i, 1u << i) + i) % (1u << i), 0u);
+ }
+}
+
+TEST(LlvmLibcBlockBitTest, RotateLeft) {
+ {
+ unsigned current = 1;
+ for (unsigned i = 0; i < 8 * sizeof(unsigned); ++i) {
+ ASSERT_EQ(1u << i, current);
+ ASSERT_EQ(current, rotate_left(1u, i));
+ current = rotate_left(current, 1u);
+ }
+ ASSERT_EQ(current, 1u);
+ }
+ {
+ int current = 1;
+ for (int i = 0; i < 8 * static_cast<int>(sizeof(int)); ++i) {
+ ASSERT_EQ(1 << i, current);
+ ASSERT_EQ(current, rotate_left(1, i));
+ current = rotate_left(current, 1);
+ }
+ ASSERT_EQ(current, 1);
+ }
+}
+
TEST(LlvmLibcBlockBitTest, NextPowerOfTwo) {
ASSERT_EQ(1u, next_power_of_two(0u));
for (unsigned int i = 0; i < 31; ++i) {
>From ab8569023d11672dbbeff5ffe6e8d4bcb2131858 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Tue, 28 Nov 2023 11:37:35 -0500
Subject: [PATCH 6/8] address code reviews
---
libc/cmake/modules/LLVMLibCFlagRules.cmake | 7 +++++++
libc/cmake/modules/LLVMLibCObjectRules.cmake | 14 ++++++++++++++
libc/src/__support/HashTable/CMakeLists.txt | 6 ++++--
libc/src/__support/HashTable/bitmask.h | 4 ++--
libc/src/__support/HashTable/sse2/bitmask_impl.inc | 2 +-
libc/src/__support/hash.h | 3 +--
6 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/libc/cmake/modules/LLVMLibCFlagRules.cmake b/libc/cmake/modules/LLVMLibCFlagRules.cmake
index a1d3dc4b567aa3f..37ffe708fb7548c 100644
--- a/libc/cmake/modules/LLVMLibCFlagRules.cmake
+++ b/libc/cmake/modules/LLVMLibCFlagRules.cmake
@@ -132,6 +132,8 @@ endfunction(get_fq_dep_list_without_flag)
# Special flags
set(FMA_OPT_FLAG "FMA_OPT")
set(ROUND_OPT_FLAG "ROUND_OPT")
+# SSE2 is the baseline for x86_64, so we add a negative flag to disable it if needed.
+set(DISABLE_SSE2_OPT_FLAG "DISABLE_SSE2_OPT")
# Skip FMA_OPT flag for targets that don't support fma.
if(NOT((LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "FMA")) OR
@@ -143,3 +145,8 @@ endif()
if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE4_2")))
set(SKIP_FLAG_EXPANSION_ROUND_OPT TRUE)
endif()
+
+# Skip DISABLE_SSE2_OPT flag for targets that don't support SSE2.
+if(NOT(LIBC_TARGET_ARCHITECTURE_IS_X86 AND (LIBC_CPU_FEATURES MATCHES "SSE2")))
+ set(SKIP_FLAG_EXPANSION_DISABLE_SSE2_OPT TRUE)
+endif()
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index d5df27a2dcb4341..6d94ce97f0b689c 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -18,6 +18,14 @@ function(_get_common_compile_options output_var flags)
set(ADD_SSE4_2_FLAG TRUE)
endif()
+ list(FIND flags ${DISABLE_SSE2_OPT_FLAG} no_sse2)
+ if(${no_sse2} LESS 0)
+ list(FIND flags "${DISABLE_SSE2_OPT_FLAG}__ONLY" no_sse2)
+ endif()
+ if((${no_sse2} GREATER -1) AND (LIBC_CPU_FEATURES MATCHES "SSE2"))
+ set(DISABLE_SSE2_FLAG TRUE)
+ endif()
+
set(compile_options ${LIBC_COMPILE_OPTIONS_DEFAULT} ${ARGN})
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
list(APPEND compile_options "-fpie")
@@ -58,12 +66,18 @@ function(_get_common_compile_options output_var flags)
if(ADD_SSE4_2_FLAG)
list(APPEND compile_options "-msse4.2")
endif()
+ if(DISABLE_SSE2_FLAG)
+ list(APPEND compile_options "-mno-sse2")
+ endif()
elseif(MSVC)
list(APPEND compile_options "/EHs-c-")
list(APPEND compile_options "/GR-")
if(ADD_FMA_FLAG)
list(APPEND compile_options "/arch:AVX2")
endif()
+ if(DISABLE_SSE2_FLAG)
+ list(APPEND compile_options "/arch:SSE")
+ endif()
endif()
if (LIBC_TARGET_ARCHITECTURE_IS_GPU)
list(APPEND compile_options "-nogpulib")
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
index e62d4eb6b71d8f1..1128ca826851739 100644
--- a/libc/src/__support/HashTable/CMakeLists.txt
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -2,6 +2,8 @@ add_header_library(
bitmask
HDRS
bitmask.h
+ FLAGS
+ DISABLE_SSE2_OPT
DEPENDS
libc.src.__support.bit
libc.src.__support.macros.properties.cpu_features
@@ -39,10 +41,10 @@ add_header_library(
randomness
HDRS
randomness.h
+ COMPILE_OPTIONS
+ ${randomness_compile_flags}
DEPENDS
libc.src.__support.hash
libc.src.__support.common
${randomness_extra_depends}
- FLAGS
- ${randomness_compile_flags}
)
diff --git a/libc/src/__support/HashTable/bitmask.h b/libc/src/__support/HashTable/bitmask.h
index e039393be645983..761125feb951d9a 100644
--- a/libc/src/__support/HashTable/bitmask.h
+++ b/libc/src/__support/HashTable/bitmask.h
@@ -40,11 +40,11 @@ template <typename T, T WORD_MASK, size_t WORD_STRIDE> struct BitMaskAdaptor {
T word;
// Check if any bit is set inside the word.
- LIBC_INLINE bool any_bit_set() const { return word != 0; }
+ LIBC_INLINE constexpr bool any_bit_set() const { return word != 0; }
// Count trailing zeros with respect to stride. (Assume the bitmask is none
// zero.)
- LIBC_INLINE size_t lowest_set_bit_nonzero() const {
+ LIBC_INLINE constexpr size_t lowest_set_bit_nonzero() const {
return unsafe_ctz<T>(word) / WORD_STRIDE;
}
};
diff --git a/libc/src/__support/HashTable/sse2/bitmask_impl.inc b/libc/src/__support/HashTable/sse2/bitmask_impl.inc
index abdf167efcef798..6308f2fed6661c2 100644
--- a/libc/src/__support/HashTable/sse2/bitmask_impl.inc
+++ b/libc/src/__support/HashTable/sse2/bitmask_impl.inc
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include <emmintrin.h>
+#include <immintrin.h>
namespace LIBC_NAMESPACE {
namespace internal {
// With SSE2, every bitmask is iteratable as
diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h
index d3e706e4424eda0..ad12cf79e8d2cf4 100644
--- a/libc/src/__support/hash.h
+++ b/libc/src/__support/hash.h
@@ -21,9 +21,8 @@ namespace internal {
// This function multiplies two 64-bit integers and xor the high and
// low 64-bit parts of the result.
LIBC_INLINE uint64_t folded_multiply(uint64_t x, uint64_t y) {
- UInt128 mask = static_cast<UInt128>(0xffffffffffffffff);
UInt128 p = static_cast<UInt128>(x) * static_cast<UInt128>(y);
- uint64_t low = static_cast<uint64_t>(p & mask);
+ uint64_t low = static_cast<uint64_t>(p);
uint64_t high = static_cast<uint64_t>(p >> 64);
return low ^ high;
}
>From 58f4a21b61f5fe2d604a6dfb4a4f4051a97f7516 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Tue, 28 Nov 2023 13:01:54 -0500
Subject: [PATCH 7/8] remove extra tests
---
.../src/__support/HashTable/CMakeLists.txt | 57 ++++---------------
.../src/__support/HashTable/group_test.cpp | 1 -
2 files changed, 12 insertions(+), 46 deletions(-)
diff --git a/libc/test/src/__support/HashTable/CMakeLists.txt b/libc/test/src/__support/HashTable/CMakeLists.txt
index 263f63e453b0eff..ee8dde107c3fe70 100644
--- a/libc/test/src/__support/HashTable/CMakeLists.txt
+++ b/libc/test/src/__support/HashTable/CMakeLists.txt
@@ -7,6 +7,7 @@ add_libc_test(
DEPENDS
libc.src.__support.HashTable.bitmask
)
+
add_libc_test(
table_test
SUITE
@@ -19,48 +20,14 @@ add_libc_test(
libc.src.__support.common
UNIT_TEST_ONLY
)
-cpu_supports(CPU_SUPPORTS_SSE2 "SSE2")
-if (CPU_SUPPORTS_SSE2)
- add_libc_test(
- group_test_sse2
- SUITE
- libc-support-tests
- SRCS
- group_test.cpp
- DEPENDS
- libc.src.__support.HashTable.bitmask
- libc.src.stdlib.rand
- libc.src.string.memcmp
- COMPILE_OPTIONS
- ${LIBC_COMPILE_OPTIONS_NATIVE}
- UNIT_TEST_ONLY
- )
- add_libc_test(
- group_test
- SUITE
- libc-support-tests
- SRCS
- group_test.cpp
- DEPENDS
- libc.src.__support.HashTable.bitmask
- libc.src.stdlib.rand
- libc.src.string.memcmp
- COMPILE_OPTIONS
- -mno-sse2 -mtune=generic
- UNIT_TEST_ONLY
- )
-else()
- add_libc_test(
- group_test
- SUITE
- libc-support-tests
- SRCS
- group_test.cpp
- DEPENDS
- libc.src.__support.HashTable.bitmask
- libc.src.stdlib.rand
- libc.src.string.memcmp
- UNIT_TEST_ONLY
- )
- message(STATUS "Skipping test for libc.test.src.__support.HashTable.group_test_sse2: insufficient host cpu features 'SSE2'")
-endif()
+
+add_libc_test(
+ group_test
+ SUITE
+ libc-support-tests
+ SRCS
+ group_test.cpp
+ DEPENDS
+ libc.src.__support.HashTable.bitmask
+ libc.src.stdlib.rand
+)
diff --git a/libc/test/src/__support/HashTable/group_test.cpp b/libc/test/src/__support/HashTable/group_test.cpp
index 3926015d2184ec7..907908335863a84 100644
--- a/libc/test/src/__support/HashTable/group_test.cpp
+++ b/libc/test/src/__support/HashTable/group_test.cpp
@@ -9,7 +9,6 @@
#include "src/__support/HashTable/bitmask.h"
#include "src/stdlib/rand.h"
-#include "src/string/memcmp.h"
#include "test/UnitTest/Test.h"
#include <stdint.h>
>From 6e462e8935500a8453c5fd381fc6bcfdfbf34081 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Tue, 28 Nov 2023 14:26:50 -0500
Subject: [PATCH 8/8] add todo for DISABLE_SSE2_OPT
---
libc/src/__support/HashTable/CMakeLists.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libc/src/__support/HashTable/CMakeLists.txt b/libc/src/__support/HashTable/CMakeLists.txt
index 1128ca826851739..238d460dacd4285 100644
--- a/libc/src/__support/HashTable/CMakeLists.txt
+++ b/libc/src/__support/HashTable/CMakeLists.txt
@@ -1,3 +1,5 @@
+# TODO: `DISABLE_SSE2_OPT` does not quite work yet.
+# We will investigate a better way of feature flag control.
add_header_library(
bitmask
HDRS
More information about the libc-commits
mailing list