[libc-commits] [libc] [libc][c23] add memset_explicit (PR #83577)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Thu Mar 7 07:18:53 PST 2024
https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/83577
>From 48658ba4591c3d2f40e8ce49104c4ac2fd72bb91 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 1 Mar 2024 10:01:18 -0500
Subject: [PATCH 1/6] [libc][c23] add memset_explicit
---
.../modules/LLVMLibCCheckCpuFeatures.cmake | 31 +++++++---
.../modules/cpu_features/check_CLFLUSHOPT.cpp | 6 ++
libc/config/linux/aarch64/entrypoints.txt | 1 +
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/spec/stdc.td | 5 ++
libc/src/string/CMakeLists.txt | 12 ++++
libc/src/string/memory_utils/CMakeLists.txt | 20 +++++++
.../memory_utils/aarch64/flush_cacheline.h | 29 ++++++++++
.../memory_utils/aarch64/inline_memset.h | 3 +-
libc/src/string/memory_utils/flush_cache.h | 57 +++++++++++++++++++
libc/src/string/memory_utils/inline_memset.h | 8 +++
.../memory_utils/x86_64/flush_cacheline.h | 27 +++++++++
libc/src/string/memset_explicit.cpp | 27 +++++++++
libc/src/string/memset_explicit.h | 20 +++++++
libc/test/src/string/CMakeLists.txt | 10 ++++
libc/test/src/string/memset_explicit_test.cpp | 31 ++++++++++
16 files changed, 280 insertions(+), 8 deletions(-)
create mode 100644 libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
create mode 100644 libc/src/string/memory_utils/aarch64/flush_cacheline.h
create mode 100644 libc/src/string/memory_utils/flush_cache.h
create mode 100644 libc/src/string/memory_utils/x86_64/flush_cacheline.h
create mode 100644 libc/src/string/memset_explicit.cpp
create mode 100644 libc/src/string/memset_explicit.h
create mode 100644 libc/test/src/string/memset_explicit_test.cpp
diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index 73b249374a0667..0225237cd7fa33 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -6,9 +6,11 @@
set(ALL_CPU_FEATURES "")
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
+ set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA CLFLUSHOPT)
+ set(CPU_FEATURES_DETECT_REQUIRES_RUN "CLFLUSHOPT")
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+ set(CPU_FEATURES_DETECT_REQUIRES_RUN "")
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
endif()
@@ -53,12 +55,27 @@ else()
# Try compile a C file to check if flag is supported.
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
foreach(feature IN LISTS ALL_CPU_FEATURES)
- try_compile(
- has_feature
- ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
- SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
- COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
- )
+ if (${feature} IN_LIST CPU_FEATURES_DETECT_REQUIRES_RUN)
+ try_run(
+ return_code
+ can_compile
+ ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
+ ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
+ COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
+ )
+ if (can_compile AND return_code EQUAL 0)
+ set(has_feature TRUE)
+ else()
+ set(has_feature FALSE)
+ endif()
+ else()
+ try_compile(
+ has_feature
+ ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
+ SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
+ COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
+ )
+ endif()
if(has_feature)
list(APPEND AVAILABLE_CPU_FEATURES ${feature})
endif()
diff --git a/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp b/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
new file mode 100644
index 00000000000000..9088a9b2e95cb3
--- /dev/null
+++ b/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
@@ -0,0 +1,6 @@
+void test(char *ptr) { asm volatile("clflushopt %0" : "+m"(*ptr)::"memory"); }
+
+int main(int argc, char **argv) {
+ test(argv[0]);
+ return 0;
+}
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 06832a41221dd8..c32773f67cda53 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.mempcpy
libc.src.string.memrchr
libc.src.string.memset
+ libc.src.string.memset_explicit
libc.src.string.rindex
libc.src.string.stpcpy
libc.src.string.stpncpy
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bc10512d942fa7..fef6a92d06aff1 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.mempcpy
libc.src.string.memrchr
libc.src.string.memset
+ libc.src.string.memset_explicit
libc.src.string.rindex
libc.src.string.stpcpy
libc.src.string.stpncpy
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 94ac62966f3ba5..c8f26eb1e07e0d 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -234,6 +234,11 @@ def StdC : StandardSpec<"stdc"> {
RetValSpec<VoidPtr>,
[ArgSpec<VoidPtr>, ArgSpec<IntType>, ArgSpec<SizeTType>]
>,
+ FunctionSpec<
+ "memset_explicit",
+ RetValSpec<VoidPtr>,
+ [ArgSpec<VoidPtr>, ArgSpec<IntType>, ArgSpec<SizeTType>]
+ >,
FunctionSpec<
"strcpy",
RetValSpec<CharPtr>,
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 1c893280e8a3c2..c169f9e99278e6 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -441,6 +441,18 @@ add_entrypoint_object(
.memory_utils.inline_memcpy
)
+add_entrypoint_object(
+ memset_explicit
+ SRCS
+ memset_explicit.cpp
+ HDRS
+ memset_explicit.h
+ DEPENDS
+ .string_utils
+ .memory_utils.inline_memset
+ .memory_utils.flush_cache
+)
+
# Helper to define a function with multiple implementations
# - Computes flags to satisfy required/rejected features and arch,
# - Declares an entry point,
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 08c0b0d34d5030..78b47a93bef73d 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -7,6 +7,7 @@ add_header_library(
aarch64/inline_memcpy.h
aarch64/inline_memmove.h
aarch64/inline_memset.h
+ aarch64/flush_cacheline.h
generic/aligned_access.h
generic/byte_per_byte.h
inline_bcmp.h
@@ -30,6 +31,7 @@ add_header_library(
x86_64/inline_memcpy.h
x86_64/inline_memmove.h
x86_64/inline_memset.h
+ x86_64/flush_cacheline.h
DEPENDS
libc.src.__support.common
libc.src.__support.CPP.bit
@@ -97,3 +99,21 @@ add_header_library(
HDRS
inline_memmem.h
)
+
+if (CLFLUSHOPT IN_LIST LIBC_CPU_FEATURES)
+ set(clflushopt_option "-DLIBC_TARGET_CPU_HAS_CLFLUSHOPT")
+ message(STATUS "Using clflushopt for cacheline flushing")
+else()
+ set(clflushopt_option "")
+endif()
+
+add_header_library(
+ flush_cache
+ HDRS
+ flush_cache.h
+ COMPILE_OPTIONS
+ ${clflushopt_option}
+ DEPENDS
+ .memory_utils
+ libc.src.__support.CPP.atomic
+)
diff --git a/libc/src/string/memory_utils/aarch64/flush_cacheline.h b/libc/src/string/memory_utils/aarch64/flush_cacheline.h
new file mode 100644
index 00000000000000..5aaa58796bc437
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/flush_cacheline.h
@@ -0,0 +1,29 @@
+//===-- Flush Cacheline for AArch64 -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
+
+#include "src/__support/common.h"
+#include <stddef.h> // size_t
+namespace LIBC_NAMESPACE {
+
+LIBC_INLINE size_t cacheline_size() {
+ // Use the same way as in compiler-rt
+ size_t ctr_el0;
+ asm volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
+ return 4 << ((ctr_el0 >> 16) & 15);
+}
+
+LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
+ // flush to external memory and invalidate the cache line
+ asm volatile("dc civac, %0" : : "r"(addr) : "memory");
+}
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index 91512acce6fc07..79d7aab278da44 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -17,6 +17,7 @@
namespace LIBC_NAMESPACE {
+template <bool OPAQUE_VALUE = false>
[[maybe_unused]] LIBC_INLINE static void
inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
static_assert(aarch64::kNeon, "aarch64 supports vector types");
@@ -45,7 +46,7 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
generic::Memset<uint256_t>::tail(dst, value, count);
return;
}
- if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+ if (!OPAQUE_VALUE && count >= 448 && value == 0 && aarch64::neon::hasZva()) {
generic::Memset<uint512_t>::block(dst, 0);
align_to_next_boundary<64>(dst, count);
return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
diff --git a/libc/src/string/memory_utils/flush_cache.h b/libc/src/string/memory_utils/flush_cache.h
new file mode 100644
index 00000000000000..5bb2055752d9ba
--- /dev/null
+++ b/libc/src/string/memory_utils/flush_cache.h
@@ -0,0 +1,57 @@
+//===-- Dispatch cache flushing -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
+
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
+
+#include <stddef.h> // size_t
+#include <stdint.h> // uintptr_t
+
+#ifdef LIBC_TARGET_ARCH_IS_X86
+#include "src/string/memory_utils/x86_64/flush_cacheline.h"
+#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/flush_cacheline.h"
+#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
+#else
+#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 0
+#endif
+
+namespace LIBC_NAMESPACE {
+
+LIBC_INLINE void flush_cache(volatile void *start, size_t size) {
+#if LIBC_HAS_FLUSH_CACHELINE_ASYNC
+ size_t line_size = cacheline_size();
+ uintptr_t addr = reinterpret_cast<uintptr_t>(start);
+ uintptr_t offset = addr % line_size;
+ // shift start to the left and align size to the right
+ // we want to cover the whole range of memory that needs to be flushed
+ size += offset;
+ size += line_size - (size % line_size);
+ addr -= offset;
+ // flush cache line async may be reordered. We need to put barriers.
+ cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
+ for (size_t i = 0; i < size; i += line_size)
+ flush_cacheline_async(reinterpret_cast<volatile char *>(addr + i));
+ cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
+#else
+ // we do not have specific instructions to flush the cache
+ // fallback to use a full memory barrier instead.
+ // Notice, however, memory fence might not flush the cache on many
+ // architectures.
+ cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
+#undef LIBC_HAS_FLUSH_CACHELINE_ASYNC
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index 1c07c1ca4bffc0..0dd698a325bb12 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -36,8 +36,16 @@
namespace LIBC_NAMESPACE {
+template <bool OPAQUE_VALUE = false>
LIBC_INLINE static void inline_memset(void *dst, uint8_t value, size_t count) {
+#if LIBC_TARGET_ARCH_IS_AARCH64
+ // The AArch64 implementation has an additional template parameter. It
+ // may uses dc zva to zero memory.
+ LIBC_SRC_STRING_MEMORY_UTILS_MEMSET<OPAQUE_VALUE>(reinterpret_cast<Ptr>(dst),
+ value, count);
+#else
LIBC_SRC_STRING_MEMORY_UTILS_MEMSET(reinterpret_cast<Ptr>(dst), value, count);
+#endif
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/string/memory_utils/x86_64/flush_cacheline.h b/libc/src/string/memory_utils/x86_64/flush_cacheline.h
new file mode 100644
index 00000000000000..db9229f69ba314
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/flush_cacheline.h
@@ -0,0 +1,27 @@
+//===-- Flush Cacheline for x86_64 ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
+
+#include "src/__support/common.h"
+#include <stddef.h> // size_t
+namespace LIBC_NAMESPACE {
+
+LIBC_INLINE constexpr size_t cacheline_size() { return 64; }
+
+LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
+#if defined(LIBC_TARGET_CPU_HAS_CLFLUSHOPT)
+ asm volatile("clflushopt %0" : "+m"(*addr)::"memory");
+#else
+ __builtin_ia32_clflush(const_cast<const char *>(addr));
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
new file mode 100644
index 00000000000000..0e58cb16c181f7
--- /dev/null
+++ b/libc/src/string/memset_explicit.cpp
@@ -0,0 +1,27 @@
+//===-- Implementation of memset_explicit ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/memset_explicit.h"
+#include "src/__support/common.h"
+#include "src/string/memory_utils/flush_cache.h"
+#include "src/string/memory_utils/inline_memset.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(void *, memset_explicit,
+ (void *dst, int value, size_t count)) {
+ // Use the inline memset function to set the memory.
+ inline_memset<true>(dst, static_cast<uint8_t>(value), count);
+
+ // Flush the cache line.
+ flush_cache(dst, count);
+
+ return dst;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/string/memset_explicit.h b/libc/src/string/memset_explicit.h
new file mode 100644
index 00000000000000..c47880dbff1854
--- /dev/null
+++ b/libc/src/string/memset_explicit.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for memset_explicit ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H
+#define LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H
+
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE {
+
+void *memset_explicit(void *ptr, int value, size_t count);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H
diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index 6088289532d771..c1caec5fd912c8 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -418,6 +418,16 @@ add_libc_test(
libc.src.string.strxfrm
)
+add_libc_test(
+ memset_explicit_test
+ SUITE
+ libc-string-tests
+ SRCS
+ memset_explicit_test.cpp
+ DEPENDS
+ libc.src.string.memset_explicit
+)
+
# Tests all implementations that can run on the target CPU.
function(add_libc_multi_impl_test name)
get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
diff --git a/libc/test/src/string/memset_explicit_test.cpp b/libc/test/src/string/memset_explicit_test.cpp
new file mode 100644
index 00000000000000..bb5111bd639e3a
--- /dev/null
+++ b/libc/test/src/string/memset_explicit_test.cpp
@@ -0,0 +1,31 @@
+//===-- Unittests for memset_explicit -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "memory_utils/memory_check_utils.h"
+#include "src/string/memset_explicit.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE {
+
+// Apply the same tests as memset
+
+static inline void Adaptor(cpp::span<char> p1, uint8_t value, size_t size) {
+ LIBC_NAMESPACE::memset_explicit(p1.begin(), value, size);
+}
+
+TEST(LlvmLibcmemsetExplicitTest, SizeSweep) {
+ static constexpr size_t kMaxSize = 400;
+ Buffer DstBuffer(kMaxSize);
+ for (size_t size = 0; size < kMaxSize; ++size) {
+ const char value = size % 10;
+ auto dst = DstBuffer.span().subspan(0, size);
+ ASSERT_TRUE((CheckMemset<Adaptor>(dst, value, size)));
+ }
+}
+
+} // namespace LIBC_NAMESPACE
>From bb457ac3005998aaef3028225bc9670d92c57aa0 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 1 Mar 2024 10:22:34 -0500
Subject: [PATCH 2/6] add noinline attr
---
libc/src/string/memset_explicit.cpp | 4 ++--
libc/src/string/memset_explicit.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 0e58cb16c181f7..5ce2cb29cc4ac2 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -13,8 +13,8 @@
namespace LIBC_NAMESPACE {
-LLVM_LIBC_FUNCTION(void *, memset_explicit,
- (void *dst, int value, size_t count)) {
+[[gnu::noinline]] LLVM_LIBC_FUNCTION(void *, memset_explicit,
+ (void *dst, int value, size_t count)) {
// Use the inline memset function to set the memory.
inline_memset<true>(dst, static_cast<uint8_t>(value), count);
diff --git a/libc/src/string/memset_explicit.h b/libc/src/string/memset_explicit.h
index c47880dbff1854..f6c189761a123c 100644
--- a/libc/src/string/memset_explicit.h
+++ b/libc/src/string/memset_explicit.h
@@ -13,7 +13,7 @@
namespace LIBC_NAMESPACE {
-void *memset_explicit(void *ptr, int value, size_t count);
+[[gnu::noinline]] void *memset_explicit(void *ptr, int value, size_t count);
} // namespace LIBC_NAMESPACE
>From aa12ab7877c5106919552ab4f3ed89c92dbfd2ce Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 6 Mar 2024 09:29:35 -0500
Subject: [PATCH 3/6] address reviews
---
.../modules/LLVMLibCCheckCpuFeatures.cmake | 30 +++-------
libc/src/string/CMakeLists.txt | 1 -
libc/src/string/memory_utils/CMakeLists.txt | 20 -------
.../memory_utils/aarch64/flush_cacheline.h | 29 ----------
libc/src/string/memory_utils/flush_cache.h | 57 -------------------
.../memory_utils/x86_64/flush_cacheline.h | 27 ---------
libc/src/string/memset_explicit.cpp | 8 +--
7 files changed, 10 insertions(+), 162 deletions(-)
delete mode 100644 libc/src/string/memory_utils/aarch64/flush_cacheline.h
delete mode 100644 libc/src/string/memory_utils/flush_cache.h
delete mode 100644 libc/src/string/memory_utils/x86_64/flush_cacheline.h
diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index 0225237cd7fa33..fe09c938a748ba 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -6,8 +6,7 @@
set(ALL_CPU_FEATURES "")
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
- set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA CLFLUSHOPT)
- set(CPU_FEATURES_DETECT_REQUIRES_RUN "CLFLUSHOPT")
+ set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
set(CPU_FEATURES_DETECT_REQUIRES_RUN "")
@@ -55,27 +54,12 @@ else()
# Try compile a C file to check if flag is supported.
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
foreach(feature IN LISTS ALL_CPU_FEATURES)
- if (${feature} IN_LIST CPU_FEATURES_DETECT_REQUIRES_RUN)
- try_run(
- return_code
- can_compile
- ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
- ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
- COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
- )
- if (can_compile AND return_code EQUAL 0)
- set(has_feature TRUE)
- else()
- set(has_feature FALSE)
- endif()
- else()
- try_compile(
- has_feature
- ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
- SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
- COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
- )
- endif()
+ try_compile(
+ has_feature
+ ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
+ SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
+ COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
+ )
if(has_feature)
list(APPEND AVAILABLE_CPU_FEATURES ${feature})
endif()
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index c169f9e99278e6..56588ffafb86f0 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -450,7 +450,6 @@ add_entrypoint_object(
DEPENDS
.string_utils
.memory_utils.inline_memset
- .memory_utils.flush_cache
)
# Helper to define a function with multiple implementations
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 78b47a93bef73d..08c0b0d34d5030 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -7,7 +7,6 @@ add_header_library(
aarch64/inline_memcpy.h
aarch64/inline_memmove.h
aarch64/inline_memset.h
- aarch64/flush_cacheline.h
generic/aligned_access.h
generic/byte_per_byte.h
inline_bcmp.h
@@ -31,7 +30,6 @@ add_header_library(
x86_64/inline_memcpy.h
x86_64/inline_memmove.h
x86_64/inline_memset.h
- x86_64/flush_cacheline.h
DEPENDS
libc.src.__support.common
libc.src.__support.CPP.bit
@@ -99,21 +97,3 @@ add_header_library(
HDRS
inline_memmem.h
)
-
-if (CLFLUSHOPT IN_LIST LIBC_CPU_FEATURES)
- set(clflushopt_option "-DLIBC_TARGET_CPU_HAS_CLFLUSHOPT")
- message(STATUS "Using clflushopt for cacheline flushing")
-else()
- set(clflushopt_option "")
-endif()
-
-add_header_library(
- flush_cache
- HDRS
- flush_cache.h
- COMPILE_OPTIONS
- ${clflushopt_option}
- DEPENDS
- .memory_utils
- libc.src.__support.CPP.atomic
-)
diff --git a/libc/src/string/memory_utils/aarch64/flush_cacheline.h b/libc/src/string/memory_utils/aarch64/flush_cacheline.h
deleted file mode 100644
index 5aaa58796bc437..00000000000000
--- a/libc/src/string/memory_utils/aarch64/flush_cacheline.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- Flush Cacheline for AArch64 -----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
-
-#include "src/__support/common.h"
-#include <stddef.h> // size_t
-namespace LIBC_NAMESPACE {
-
-LIBC_INLINE size_t cacheline_size() {
- // Use the same way as in compiler-rt
- size_t ctr_el0;
- asm volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
- return 4 << ((ctr_el0 >> 16) & 15);
-}
-
-LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
- // flush to external memory and invalidate the cache line
- asm volatile("dc civac, %0" : : "r"(addr) : "memory");
-}
-
-} // namespace LIBC_NAMESPACE
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memory_utils/flush_cache.h b/libc/src/string/memory_utils/flush_cache.h
deleted file mode 100644
index 5bb2055752d9ba..00000000000000
--- a/libc/src/string/memory_utils/flush_cache.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- Dispatch cache flushing -------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
-
-#include "src/__support/CPP/atomic.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
-
-#include <stddef.h> // size_t
-#include <stdint.h> // uintptr_t
-
-#ifdef LIBC_TARGET_ARCH_IS_X86
-#include "src/string/memory_utils/x86_64/flush_cacheline.h"
-#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
-#include "src/string/memory_utils/aarch64/flush_cacheline.h"
-#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
-#else
-#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 0
-#endif
-
-namespace LIBC_NAMESPACE {
-
-LIBC_INLINE void flush_cache(volatile void *start, size_t size) {
-#if LIBC_HAS_FLUSH_CACHELINE_ASYNC
- size_t line_size = cacheline_size();
- uintptr_t addr = reinterpret_cast<uintptr_t>(start);
- uintptr_t offset = addr % line_size;
- // shift start to the left and align size to the right
- // we want to cover the whole range of memory that needs to be flushed
- size += offset;
- size += line_size - (size % line_size);
- addr -= offset;
- // flush cache line async may be reordered. We need to put barriers.
- cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
- for (size_t i = 0; i < size; i += line_size)
- flush_cacheline_async(reinterpret_cast<volatile char *>(addr + i));
- cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
-#else
- // we do not have specific instructions to flush the cache
- // fallback to use a full memory barrier instead.
- // Notice, however, memory fence might not flush the cache on many
- // architectures.
- cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
-#endif
-}
-
-} // namespace LIBC_NAMESPACE
-#undef LIBC_HAS_FLUSH_CACHELINE_ASYNC
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
diff --git a/libc/src/string/memory_utils/x86_64/flush_cacheline.h b/libc/src/string/memory_utils/x86_64/flush_cacheline.h
deleted file mode 100644
index db9229f69ba314..00000000000000
--- a/libc/src/string/memory_utils/x86_64/flush_cacheline.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- Flush Cacheline for x86_64 ------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
-
-#include "src/__support/common.h"
-#include <stddef.h> // size_t
-namespace LIBC_NAMESPACE {
-
-LIBC_INLINE constexpr size_t cacheline_size() { return 64; }
-
-LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
-#if defined(LIBC_TARGET_CPU_HAS_CLFLUSHOPT)
- asm volatile("clflushopt %0" : "+m"(*addr)::"memory");
-#else
- __builtin_ia32_clflush(const_cast<const char *>(addr));
-#endif
-}
-
-} // namespace LIBC_NAMESPACE
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 5ce2cb29cc4ac2..3b641e297d5b14 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -8,7 +8,6 @@
#include "src/string/memset_explicit.h"
#include "src/__support/common.h"
-#include "src/string/memory_utils/flush_cache.h"
#include "src/string/memory_utils/inline_memset.h"
namespace LIBC_NAMESPACE {
@@ -17,10 +16,9 @@ namespace LIBC_NAMESPACE {
(void *dst, int value, size_t count)) {
// Use the inline memset function to set the memory.
inline_memset<true>(dst, static_cast<uint8_t>(value), count);
-
- // Flush the cache line.
- flush_cache(dst, count);
-
+ // avoid dead store elimination
+ // The asm itself should also be sufficient to behave as a compiler barrier.
+ asm volatile("" : : "r"(dst) : "memory");
return dst;
}
>From c4d3ce22bcbf6421ff3e28ce49fe3b2190bd31f1 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 6 Mar 2024 09:32:35 -0500
Subject: [PATCH 4/6] remove extra code
---
libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake | 1 -
libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp | 6 ------
2 files changed, 7 deletions(-)
delete mode 100644 libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index fe09c938a748ba..73b249374a0667 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -9,7 +9,6 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
- set(CPU_FEATURES_DETECT_REQUIRES_RUN "")
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
endif()
diff --git a/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp b/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
deleted file mode 100644
index 9088a9b2e95cb3..00000000000000
--- a/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-void test(char *ptr) { asm volatile("clflushopt %0" : "+m"(*ptr)::"memory"); }
-
-int main(int argc, char **argv) {
- test(argv[0]);
- return 0;
-}
>From ff5fb31deae626c39395ddf8017890637ff7db7a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 6 Mar 2024 16:23:26 -0500
Subject: [PATCH 5/6] undo aarch64 changes
---
libc/src/string/memory_utils/aarch64/inline_memset.h | 3 +--
libc/src/string/memory_utils/inline_memset.h | 8 --------
libc/src/string/memset_explicit.cpp | 2 +-
3 files changed, 2 insertions(+), 11 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index 79d7aab278da44..91512acce6fc07 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -17,7 +17,6 @@
namespace LIBC_NAMESPACE {
-template <bool OPAQUE_VALUE = false>
[[maybe_unused]] LIBC_INLINE static void
inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
static_assert(aarch64::kNeon, "aarch64 supports vector types");
@@ -46,7 +45,7 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
generic::Memset<uint256_t>::tail(dst, value, count);
return;
}
- if (!OPAQUE_VALUE && count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+ if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
generic::Memset<uint512_t>::block(dst, 0);
align_to_next_boundary<64>(dst, count);
return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index 0dd698a325bb12..1c07c1ca4bffc0 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -36,16 +36,8 @@
namespace LIBC_NAMESPACE {
-template <bool OPAQUE_VALUE = false>
LIBC_INLINE static void inline_memset(void *dst, uint8_t value, size_t count) {
-#if LIBC_TARGET_ARCH_IS_AARCH64
- // The AArch64 implementation has an additional template parameter. It
- // may uses dc zva to zero memory.
- LIBC_SRC_STRING_MEMORY_UTILS_MEMSET<OPAQUE_VALUE>(reinterpret_cast<Ptr>(dst),
- value, count);
-#else
LIBC_SRC_STRING_MEMORY_UTILS_MEMSET(reinterpret_cast<Ptr>(dst), value, count);
-#endif
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 3b641e297d5b14..348dad293aee81 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -18,7 +18,7 @@ namespace LIBC_NAMESPACE {
inline_memset<true>(dst, static_cast<uint8_t>(value), count);
// avoid dead store elimination
// The asm itself should also be sufficient to behave as a compiler barrier.
- asm volatile("" : : "r"(dst) : "memory");
+ asm("" : : "r"(dst) : "memory");
return dst;
}
>From 364bb4b6a24555e407e217c0c30cb7cca4e9379a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Thu, 7 Mar 2024 10:18:39 -0500
Subject: [PATCH 6/6] remove template arg
---
libc/src/string/memset_explicit.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 348dad293aee81..a8656d1e791e84 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -15,7 +15,7 @@ namespace LIBC_NAMESPACE {
[[gnu::noinline]] LLVM_LIBC_FUNCTION(void *, memset_explicit,
(void *dst, int value, size_t count)) {
// Use the inline memset function to set the memory.
- inline_memset<true>(dst, static_cast<uint8_t>(value), count);
+ inline_memset(dst, static_cast<uint8_t>(value), count);
// avoid dead store elimination
// The asm itself should also be sufficient to behave as a compiler barrier.
asm("" : : "r"(dst) : "memory");
More information about the libc-commits
mailing list