[libc-commits] [libc] [libc][c23] add memset_explicit (PR #83577)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Thu Mar 7 07:18:53 PST 2024


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/83577

>From 48658ba4591c3d2f40e8ce49104c4ac2fd72bb91 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 1 Mar 2024 10:01:18 -0500
Subject: [PATCH 1/6] [libc][c23] add memset_explicit

---
 .../modules/LLVMLibCCheckCpuFeatures.cmake    | 31 +++++++---
 .../modules/cpu_features/check_CLFLUSHOPT.cpp |  6 ++
 libc/config/linux/aarch64/entrypoints.txt     |  1 +
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/spec/stdc.td                             |  5 ++
 libc/src/string/CMakeLists.txt                | 12 ++++
 libc/src/string/memory_utils/CMakeLists.txt   | 20 +++++++
 .../memory_utils/aarch64/flush_cacheline.h    | 29 ++++++++++
 .../memory_utils/aarch64/inline_memset.h      |  3 +-
 libc/src/string/memory_utils/flush_cache.h    | 57 +++++++++++++++++++
 libc/src/string/memory_utils/inline_memset.h  |  8 +++
 .../memory_utils/x86_64/flush_cacheline.h     | 27 +++++++++
 libc/src/string/memset_explicit.cpp           | 27 +++++++++
 libc/src/string/memset_explicit.h             | 20 +++++++
 libc/test/src/string/CMakeLists.txt           | 10 ++++
 libc/test/src/string/memset_explicit_test.cpp | 31 ++++++++++
 16 files changed, 280 insertions(+), 8 deletions(-)
 create mode 100644 libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
 create mode 100644 libc/src/string/memory_utils/aarch64/flush_cacheline.h
 create mode 100644 libc/src/string/memory_utils/flush_cache.h
 create mode 100644 libc/src/string/memory_utils/x86_64/flush_cacheline.h
 create mode 100644 libc/src/string/memset_explicit.cpp
 create mode 100644 libc/src/string/memset_explicit.h
 create mode 100644 libc/test/src/string/memset_explicit_test.cpp

diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index 73b249374a0667..0225237cd7fa33 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -6,9 +6,11 @@
 set(ALL_CPU_FEATURES "")
 
 if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
-  set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
+  set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA CLFLUSHOPT)
+  set(CPU_FEATURES_DETECT_REQUIRES_RUN "CLFLUSHOPT")
   set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
 elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+  set(CPU_FEATURES_DETECT_REQUIRES_RUN "")
   set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
 endif()
 
@@ -53,12 +55,27 @@ else()
   # Try compile a C file to check if flag is supported.
   set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
   foreach(feature IN LISTS ALL_CPU_FEATURES)
-    try_compile(
-      has_feature
-      ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
-      SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
-      COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
-    )
+    if (${feature} IN_LIST CPU_FEATURES_DETECT_REQUIRES_RUN)
+      try_run(
+        return_code
+        can_compile
+        ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
+        ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
+        COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
+      )
+      if (can_compile AND return_code EQUAL 0)
+        set(has_feature TRUE)
+      else()
+        set(has_feature FALSE)
+      endif()
+    else()
+      try_compile(
+        has_feature
+        ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
+        SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
+        COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
+      )
+    endif()
     if(has_feature)
       list(APPEND AVAILABLE_CPU_FEATURES ${feature})
     endif()
diff --git a/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp b/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
new file mode 100644
index 00000000000000..9088a9b2e95cb3
--- /dev/null
+++ b/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
@@ -0,0 +1,6 @@
+void test(char *ptr) { asm volatile("clflushopt %0" : "+m"(*ptr)::"memory"); }
+
+int main(int argc, char **argv) {
+  test(argv[0]);
+  return 0;
+}
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 06832a41221dd8..c32773f67cda53 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.mempcpy
     libc.src.string.memrchr
     libc.src.string.memset
+    libc.src.string.memset_explicit
     libc.src.string.rindex
     libc.src.string.stpcpy
     libc.src.string.stpncpy
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bc10512d942fa7..fef6a92d06aff1 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -51,6 +51,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.mempcpy
     libc.src.string.memrchr
     libc.src.string.memset
+    libc.src.string.memset_explicit
     libc.src.string.rindex
     libc.src.string.stpcpy
     libc.src.string.stpncpy
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 94ac62966f3ba5..c8f26eb1e07e0d 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -234,6 +234,11 @@ def StdC : StandardSpec<"stdc"> {
               RetValSpec<VoidPtr>,
               [ArgSpec<VoidPtr>, ArgSpec<IntType>, ArgSpec<SizeTType>]
           >,
+          FunctionSpec<
+              "memset_explicit",
+              RetValSpec<VoidPtr>,
+              [ArgSpec<VoidPtr>, ArgSpec<IntType>, ArgSpec<SizeTType>]
+          >,
           FunctionSpec<
               "strcpy",
               RetValSpec<CharPtr>,
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 1c893280e8a3c2..c169f9e99278e6 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -441,6 +441,18 @@ add_entrypoint_object(
     .memory_utils.inline_memcpy
 )
 
+add_entrypoint_object(
+  memset_explicit
+  SRCS
+    memset_explicit.cpp
+  HDRS
+    memset_explicit.h
+  DEPENDS
+    .string_utils
+    .memory_utils.inline_memset
+    .memory_utils.flush_cache
+)
+
 # Helper to define a function with multiple implementations
 # - Computes flags to satisfy required/rejected features and arch,
 # - Declares an entry point,
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 08c0b0d34d5030..78b47a93bef73d 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -7,6 +7,7 @@ add_header_library(
     aarch64/inline_memcpy.h
     aarch64/inline_memmove.h
     aarch64/inline_memset.h
+    aarch64/flush_cacheline.h
     generic/aligned_access.h
     generic/byte_per_byte.h
     inline_bcmp.h
@@ -30,6 +31,7 @@ add_header_library(
     x86_64/inline_memcpy.h
     x86_64/inline_memmove.h
     x86_64/inline_memset.h
+    x86_64/flush_cacheline.h
   DEPENDS
     libc.src.__support.common
     libc.src.__support.CPP.bit
@@ -97,3 +99,21 @@ add_header_library(
   HDRS
     inline_memmem.h
 )
+
+if (CLFLUSHOPT IN_LIST LIBC_CPU_FEATURES)
+  set(clflushopt_option "-DLIBC_TARGET_CPU_HAS_CLFLUSHOPT")
+  message(STATUS "Using clflushopt for cacheline flushing")
+else()
+  set(clflushopt_option "")
+endif()
+
+add_header_library(
+  flush_cache
+  HDRS
+    flush_cache.h
+  COMPILE_OPTIONS
+    ${clflushopt_option}
+  DEPENDS
+    .memory_utils
+    libc.src.__support.CPP.atomic
+)
diff --git a/libc/src/string/memory_utils/aarch64/flush_cacheline.h b/libc/src/string/memory_utils/aarch64/flush_cacheline.h
new file mode 100644
index 00000000000000..5aaa58796bc437
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/flush_cacheline.h
@@ -0,0 +1,29 @@
+//===-- Flush Cacheline for AArch64 -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
+
+#include "src/__support/common.h"
+#include <stddef.h> // size_t
+namespace LIBC_NAMESPACE {
+
+LIBC_INLINE size_t cacheline_size() {
+  // Use the same way as in compiler-rt
+  size_t ctr_el0;
+  asm volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
+  return 4 << ((ctr_el0 >> 16) & 15);
+}
+
+LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
+  // flush to external memory and invalidate the cache line
+  asm volatile("dc civac, %0" : : "r"(addr) : "memory");
+}
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index 91512acce6fc07..79d7aab278da44 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -17,6 +17,7 @@
 
 namespace LIBC_NAMESPACE {
 
+template <bool OPAQUE_VALUE = false>
 [[maybe_unused]] LIBC_INLINE static void
 inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
   static_assert(aarch64::kNeon, "aarch64 supports vector types");
@@ -45,7 +46,7 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
     generic::Memset<uint256_t>::tail(dst, value, count);
     return;
   }
-  if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+  if (!OPAQUE_VALUE && count >= 448 && value == 0 && aarch64::neon::hasZva()) {
     generic::Memset<uint512_t>::block(dst, 0);
     align_to_next_boundary<64>(dst, count);
     return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
diff --git a/libc/src/string/memory_utils/flush_cache.h b/libc/src/string/memory_utils/flush_cache.h
new file mode 100644
index 00000000000000..5bb2055752d9ba
--- /dev/null
+++ b/libc/src/string/memory_utils/flush_cache.h
@@ -0,0 +1,57 @@
+//===-- Dispatch cache flushing -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
+
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
+
+#include <stddef.h> // size_t
+#include <stdint.h> // uintptr_t
+
+#ifdef LIBC_TARGET_ARCH_IS_X86
+#include "src/string/memory_utils/x86_64/flush_cacheline.h"
+#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/flush_cacheline.h"
+#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
+#else
+#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 0
+#endif
+
+namespace LIBC_NAMESPACE {
+
+LIBC_INLINE void flush_cache(volatile void *start, size_t size) {
+#if LIBC_HAS_FLUSH_CACHELINE_ASYNC
+  size_t line_size = cacheline_size();
+  uintptr_t addr = reinterpret_cast<uintptr_t>(start);
+  uintptr_t offset = addr % line_size;
+  // shift start to the left and align size to the right
+  // we want to cover the whole range of memory that needs to be flushed
+  size += offset;
+  size += line_size - (size % line_size);
+  addr -= offset;
+  // flush cache line async may be reordered. We need to put barriers.
+  cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
+  for (size_t i = 0; i < size; i += line_size)
+    flush_cacheline_async(reinterpret_cast<volatile char *>(addr + i));
+  cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
+#else
+  // we do not have specific instructions to flush the cache
+  // fallback to use a full memory barrier instead.
+  // Notice, however, memory fence might not flush the cache on many
+  // architectures.
+  cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
+#undef LIBC_HAS_FLUSH_CACHELINE_ASYNC
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index 1c07c1ca4bffc0..0dd698a325bb12 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -36,8 +36,16 @@
 
 namespace LIBC_NAMESPACE {
 
+template <bool OPAQUE_VALUE = false>
 LIBC_INLINE static void inline_memset(void *dst, uint8_t value, size_t count) {
+#if LIBC_TARGET_ARCH_IS_AARCH64
+  // The AArch64 implementation has an additional template parameter. It
+  // may uses dc zva to zero memory.
+  LIBC_SRC_STRING_MEMORY_UTILS_MEMSET<OPAQUE_VALUE>(reinterpret_cast<Ptr>(dst),
+                                                    value, count);
+#else
   LIBC_SRC_STRING_MEMORY_UTILS_MEMSET(reinterpret_cast<Ptr>(dst), value, count);
+#endif
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/string/memory_utils/x86_64/flush_cacheline.h b/libc/src/string/memory_utils/x86_64/flush_cacheline.h
new file mode 100644
index 00000000000000..db9229f69ba314
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/flush_cacheline.h
@@ -0,0 +1,27 @@
+//===-- Flush Cacheline for x86_64 ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
+
+#include "src/__support/common.h"
+#include <stddef.h> // size_t
+namespace LIBC_NAMESPACE {
+
+LIBC_INLINE constexpr size_t cacheline_size() { return 64; }
+
+LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
+#if defined(LIBC_TARGET_CPU_HAS_CLFLUSHOPT)
+  asm volatile("clflushopt %0" : "+m"(*addr)::"memory");
+#else
+  __builtin_ia32_clflush(const_cast<const char *>(addr));
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
new file mode 100644
index 00000000000000..0e58cb16c181f7
--- /dev/null
+++ b/libc/src/string/memset_explicit.cpp
@@ -0,0 +1,27 @@
+//===-- Implementation of memset_explicit ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/memset_explicit.h"
+#include "src/__support/common.h"
+#include "src/string/memory_utils/flush_cache.h"
+#include "src/string/memory_utils/inline_memset.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(void *, memset_explicit,
+                   (void *dst, int value, size_t count)) {
+  // Use the inline memset function to set the memory.
+  inline_memset<true>(dst, static_cast<uint8_t>(value), count);
+
+  // Flush the cache line.
+  flush_cache(dst, count);
+
+  return dst;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/string/memset_explicit.h b/libc/src/string/memset_explicit.h
new file mode 100644
index 00000000000000..c47880dbff1854
--- /dev/null
+++ b/libc/src/string/memset_explicit.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for memset_explicit ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H
+#define LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H
+
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE {
+
+void *memset_explicit(void *ptr, int value, size_t count);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STRING_MEMSET_EXPLICIT_H
diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index 6088289532d771..c1caec5fd912c8 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -418,6 +418,16 @@ add_libc_test(
     libc.src.string.strxfrm
 )
 
+add_libc_test(
+  memset_explicit_test
+  SUITE
+    libc-string-tests
+  SRCS
+    memset_explicit_test.cpp
+  DEPENDS
+    libc.src.string.memset_explicit
+)
+
 # Tests all implementations that can run on the target CPU.
 function(add_libc_multi_impl_test name)
   get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
diff --git a/libc/test/src/string/memset_explicit_test.cpp b/libc/test/src/string/memset_explicit_test.cpp
new file mode 100644
index 00000000000000..bb5111bd639e3a
--- /dev/null
+++ b/libc/test/src/string/memset_explicit_test.cpp
@@ -0,0 +1,31 @@
+//===-- Unittests for memset_explicit -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "memory_utils/memory_check_utils.h"
+#include "src/string/memset_explicit.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE {
+
+// Apply the same tests as memset
+
+static inline void Adaptor(cpp::span<char> p1, uint8_t value, size_t size) {
+  LIBC_NAMESPACE::memset_explicit(p1.begin(), value, size);
+}
+
+TEST(LlvmLibcmemsetExplicitTest, SizeSweep) {
+  static constexpr size_t kMaxSize = 400;
+  Buffer DstBuffer(kMaxSize);
+  for (size_t size = 0; size < kMaxSize; ++size) {
+    const char value = size % 10;
+    auto dst = DstBuffer.span().subspan(0, size);
+    ASSERT_TRUE((CheckMemset<Adaptor>(dst, value, size)));
+  }
+}
+
+} // namespace LIBC_NAMESPACE

>From bb457ac3005998aaef3028225bc9670d92c57aa0 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 1 Mar 2024 10:22:34 -0500
Subject: [PATCH 2/6] add noinline attr

---
 libc/src/string/memset_explicit.cpp | 4 ++--
 libc/src/string/memset_explicit.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 0e58cb16c181f7..5ce2cb29cc4ac2 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -13,8 +13,8 @@
 
 namespace LIBC_NAMESPACE {
 
-LLVM_LIBC_FUNCTION(void *, memset_explicit,
-                   (void *dst, int value, size_t count)) {
+[[gnu::noinline]] LLVM_LIBC_FUNCTION(void *, memset_explicit,
+                                     (void *dst, int value, size_t count)) {
   // Use the inline memset function to set the memory.
   inline_memset<true>(dst, static_cast<uint8_t>(value), count);
 
diff --git a/libc/src/string/memset_explicit.h b/libc/src/string/memset_explicit.h
index c47880dbff1854..f6c189761a123c 100644
--- a/libc/src/string/memset_explicit.h
+++ b/libc/src/string/memset_explicit.h
@@ -13,7 +13,7 @@
 
 namespace LIBC_NAMESPACE {
 
-void *memset_explicit(void *ptr, int value, size_t count);
+[[gnu::noinline]] void *memset_explicit(void *ptr, int value, size_t count);
 
 } // namespace LIBC_NAMESPACE
 

>From aa12ab7877c5106919552ab4f3ed89c92dbfd2ce Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 6 Mar 2024 09:29:35 -0500
Subject: [PATCH 3/6] address reviews

---
 .../modules/LLVMLibCCheckCpuFeatures.cmake    | 30 +++-------
 libc/src/string/CMakeLists.txt                |  1 -
 libc/src/string/memory_utils/CMakeLists.txt   | 20 -------
 .../memory_utils/aarch64/flush_cacheline.h    | 29 ----------
 libc/src/string/memory_utils/flush_cache.h    | 57 -------------------
 .../memory_utils/x86_64/flush_cacheline.h     | 27 ---------
 libc/src/string/memset_explicit.cpp           |  8 +--
 7 files changed, 10 insertions(+), 162 deletions(-)
 delete mode 100644 libc/src/string/memory_utils/aarch64/flush_cacheline.h
 delete mode 100644 libc/src/string/memory_utils/flush_cache.h
 delete mode 100644 libc/src/string/memory_utils/x86_64/flush_cacheline.h

diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index 0225237cd7fa33..fe09c938a748ba 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -6,8 +6,7 @@
 set(ALL_CPU_FEATURES "")
 
 if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
-  set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA CLFLUSHOPT)
-  set(CPU_FEATURES_DETECT_REQUIRES_RUN "CLFLUSHOPT")
+  set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
   set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
 elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
   set(CPU_FEATURES_DETECT_REQUIRES_RUN "")
@@ -55,27 +54,12 @@ else()
   # Try compile a C file to check if flag is supported.
   set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
   foreach(feature IN LISTS ALL_CPU_FEATURES)
-    if (${feature} IN_LIST CPU_FEATURES_DETECT_REQUIRES_RUN)
-      try_run(
-        return_code
-        can_compile
-        ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
-        ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
-        COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
-      )
-      if (can_compile AND return_code EQUAL 0)
-        set(has_feature TRUE)
-      else()
-        set(has_feature FALSE)
-      endif()
-    else()
-      try_compile(
-        has_feature
-        ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
-        SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
-        COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
-      )
-    endif()
+    try_compile(
+      has_feature
+      ${CMAKE_CURRENT_BINARY_DIR}/cpu_features
+      SOURCES ${LIBC_SOURCE_DIR}/cmake/modules/cpu_features/check_${feature}.cpp
+      COMPILE_DEFINITIONS -I${LIBC_SOURCE_DIR} ${LIBC_COMPILE_OPTIONS_NATIVE}
+    )
     if(has_feature)
       list(APPEND AVAILABLE_CPU_FEATURES ${feature})
     endif()
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index c169f9e99278e6..56588ffafb86f0 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -450,7 +450,6 @@ add_entrypoint_object(
   DEPENDS
     .string_utils
     .memory_utils.inline_memset
-    .memory_utils.flush_cache
 )
 
 # Helper to define a function with multiple implementations
diff --git a/libc/src/string/memory_utils/CMakeLists.txt b/libc/src/string/memory_utils/CMakeLists.txt
index 78b47a93bef73d..08c0b0d34d5030 100644
--- a/libc/src/string/memory_utils/CMakeLists.txt
+++ b/libc/src/string/memory_utils/CMakeLists.txt
@@ -7,7 +7,6 @@ add_header_library(
     aarch64/inline_memcpy.h
     aarch64/inline_memmove.h
     aarch64/inline_memset.h
-    aarch64/flush_cacheline.h
     generic/aligned_access.h
     generic/byte_per_byte.h
     inline_bcmp.h
@@ -31,7 +30,6 @@ add_header_library(
     x86_64/inline_memcpy.h
     x86_64/inline_memmove.h
     x86_64/inline_memset.h
-    x86_64/flush_cacheline.h
   DEPENDS
     libc.src.__support.common
     libc.src.__support.CPP.bit
@@ -99,21 +97,3 @@ add_header_library(
   HDRS
     inline_memmem.h
 )
-
-if (CLFLUSHOPT IN_LIST LIBC_CPU_FEATURES)
-  set(clflushopt_option "-DLIBC_TARGET_CPU_HAS_CLFLUSHOPT")
-  message(STATUS "Using clflushopt for cacheline flushing")
-else()
-  set(clflushopt_option "")
-endif()
-
-add_header_library(
-  flush_cache
-  HDRS
-    flush_cache.h
-  COMPILE_OPTIONS
-    ${clflushopt_option}
-  DEPENDS
-    .memory_utils
-    libc.src.__support.CPP.atomic
-)
diff --git a/libc/src/string/memory_utils/aarch64/flush_cacheline.h b/libc/src/string/memory_utils/aarch64/flush_cacheline.h
deleted file mode 100644
index 5aaa58796bc437..00000000000000
--- a/libc/src/string/memory_utils/aarch64/flush_cacheline.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- Flush Cacheline for AArch64 -----------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
-
-#include "src/__support/common.h"
-#include <stddef.h> // size_t
-namespace LIBC_NAMESPACE {
-
-LIBC_INLINE size_t cacheline_size() {
-  // Use the same way as in compiler-rt
-  size_t ctr_el0;
-  asm volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
-  return 4 << ((ctr_el0 >> 16) & 15);
-}
-
-LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
-  // flush to external memory and invalidate the cache line
-  asm volatile("dc civac, %0" : : "r"(addr) : "memory");
-}
-
-} // namespace LIBC_NAMESPACE
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memory_utils/flush_cache.h b/libc/src/string/memory_utils/flush_cache.h
deleted file mode 100644
index 5bb2055752d9ba..00000000000000
--- a/libc/src/string/memory_utils/flush_cache.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- Dispatch cache flushing -------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
-
-#include "src/__support/CPP/atomic.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
-
-#include <stddef.h> // size_t
-#include <stdint.h> // uintptr_t
-
-#ifdef LIBC_TARGET_ARCH_IS_X86
-#include "src/string/memory_utils/x86_64/flush_cacheline.h"
-#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
-#include "src/string/memory_utils/aarch64/flush_cacheline.h"
-#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 1
-#else
-#define LIBC_HAS_FLUSH_CACHELINE_ASYNC 0
-#endif
-
-namespace LIBC_NAMESPACE {
-
-LIBC_INLINE void flush_cache(volatile void *start, size_t size) {
-#if LIBC_HAS_FLUSH_CACHELINE_ASYNC
-  size_t line_size = cacheline_size();
-  uintptr_t addr = reinterpret_cast<uintptr_t>(start);
-  uintptr_t offset = addr % line_size;
-  // shift start to the left and align size to the right
-  // we want to cover the whole range of memory that needs to be flushed
-  size += offset;
-  size += line_size - (size % line_size);
-  addr -= offset;
-  // flush cache line async may be reordered. We need to put barriers.
-  cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
-  for (size_t i = 0; i < size; i += line_size)
-    flush_cacheline_async(reinterpret_cast<volatile char *>(addr + i));
-  cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
-#else
-  // we do not have specific instructions to flush the cache
-  // fallback to use a full memory barrier instead.
-  // Notice, however, memory fence might not flush the cache on many
-  // architectures.
-  cpp::atomic_thread_fence(cpp::MemoryOrder::SEQ_CST);
-#endif
-}
-
-} // namespace LIBC_NAMESPACE
-#undef LIBC_HAS_FLUSH_CACHELINE_ASYNC
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_FLUSH_CACHE_H
diff --git a/libc/src/string/memory_utils/x86_64/flush_cacheline.h b/libc/src/string/memory_utils/x86_64/flush_cacheline.h
deleted file mode 100644
index db9229f69ba314..00000000000000
--- a/libc/src/string/memory_utils/x86_64/flush_cacheline.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- Flush Cacheline for x86_64 ------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
-#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
-
-#include "src/__support/common.h"
-#include <stddef.h> // size_t
-namespace LIBC_NAMESPACE {
-
-LIBC_INLINE constexpr size_t cacheline_size() { return 64; }
-
-LIBC_INLINE void flush_cacheline_async(volatile char *addr) {
-#if defined(LIBC_TARGET_CPU_HAS_CLFLUSHOPT)
-  asm volatile("clflushopt %0" : "+m"(*addr)::"memory");
-#else
-  __builtin_ia32_clflush(const_cast<const char *>(addr));
-#endif
-}
-
-} // namespace LIBC_NAMESPACE
-
-#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_FLUSH_CACHELINE_H
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 5ce2cb29cc4ac2..3b641e297d5b14 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -8,7 +8,6 @@
 
 #include "src/string/memset_explicit.h"
 #include "src/__support/common.h"
-#include "src/string/memory_utils/flush_cache.h"
 #include "src/string/memory_utils/inline_memset.h"
 
 namespace LIBC_NAMESPACE {
@@ -17,10 +16,9 @@ namespace LIBC_NAMESPACE {
                                      (void *dst, int value, size_t count)) {
   // Use the inline memset function to set the memory.
   inline_memset<true>(dst, static_cast<uint8_t>(value), count);
-
-  // Flush the cache line.
-  flush_cache(dst, count);
-
+  // avoid dead store elimination
+  // The asm itself should also be sufficient to behave as a compiler barrier.
+  asm volatile("" : : "r"(dst) : "memory");
   return dst;
 }
 

>From c4d3ce22bcbf6421ff3e28ce49fe3b2190bd31f1 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 6 Mar 2024 09:32:35 -0500
Subject: [PATCH 4/6] remove extra code

---
 libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake    | 1 -
 libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp | 6 ------
 2 files changed, 7 deletions(-)
 delete mode 100644 libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp

diff --git a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
index fe09c938a748ba..73b249374a0667 100644
--- a/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckCpuFeatures.cmake
@@ -9,7 +9,6 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
   set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX AVX2 AVX512F AVX512BW FMA)
   set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
 elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
-  set(CPU_FEATURES_DETECT_REQUIRES_RUN "")
   set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
 endif()
 
diff --git a/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp b/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
deleted file mode 100644
index 9088a9b2e95cb3..00000000000000
--- a/libc/cmake/modules/cpu_features/check_CLFLUSHOPT.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-void test(char *ptr) { asm volatile("clflushopt %0" : "+m"(*ptr)::"memory"); }
-
-int main(int argc, char **argv) {
-  test(argv[0]);
-  return 0;
-}

>From ff5fb31deae626c39395ddf8017890637ff7db7a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 6 Mar 2024 16:23:26 -0500
Subject: [PATCH 5/6] undo aarch64 changes

---
 libc/src/string/memory_utils/aarch64/inline_memset.h | 3 +--
 libc/src/string/memory_utils/inline_memset.h         | 8 --------
 libc/src/string/memset_explicit.cpp                  | 2 +-
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/libc/src/string/memory_utils/aarch64/inline_memset.h b/libc/src/string/memory_utils/aarch64/inline_memset.h
index 79d7aab278da44..91512acce6fc07 100644
--- a/libc/src/string/memory_utils/aarch64/inline_memset.h
+++ b/libc/src/string/memory_utils/aarch64/inline_memset.h
@@ -17,7 +17,6 @@
 
 namespace LIBC_NAMESPACE {
 
-template <bool OPAQUE_VALUE = false>
 [[maybe_unused]] LIBC_INLINE static void
 inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
   static_assert(aarch64::kNeon, "aarch64 supports vector types");
@@ -46,7 +45,7 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
     generic::Memset<uint256_t>::tail(dst, value, count);
     return;
   }
-  if (!OPAQUE_VALUE && count >= 448 && value == 0 && aarch64::neon::hasZva()) {
+  if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
     generic::Memset<uint512_t>::block(dst, 0);
     align_to_next_boundary<64>(dst, count);
     return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
diff --git a/libc/src/string/memory_utils/inline_memset.h b/libc/src/string/memory_utils/inline_memset.h
index 0dd698a325bb12..1c07c1ca4bffc0 100644
--- a/libc/src/string/memory_utils/inline_memset.h
+++ b/libc/src/string/memory_utils/inline_memset.h
@@ -36,16 +36,8 @@
 
 namespace LIBC_NAMESPACE {
 
-template <bool OPAQUE_VALUE = false>
 LIBC_INLINE static void inline_memset(void *dst, uint8_t value, size_t count) {
-#if LIBC_TARGET_ARCH_IS_AARCH64
-  // The AArch64 implementation has an additional template parameter. It
-  // may uses dc zva to zero memory.
-  LIBC_SRC_STRING_MEMORY_UTILS_MEMSET<OPAQUE_VALUE>(reinterpret_cast<Ptr>(dst),
-                                                    value, count);
-#else
   LIBC_SRC_STRING_MEMORY_UTILS_MEMSET(reinterpret_cast<Ptr>(dst), value, count);
-#endif
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 3b641e297d5b14..348dad293aee81 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -18,7 +18,7 @@ namespace LIBC_NAMESPACE {
   inline_memset<true>(dst, static_cast<uint8_t>(value), count);
   // avoid dead store elimination
   // The asm itself should also be sufficient to behave as a compiler barrier.
-  asm volatile("" : : "r"(dst) : "memory");
+  asm("" : : "r"(dst) : "memory");
   return dst;
 }
 

>From 364bb4b6a24555e407e217c0c30cb7cca4e9379a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Thu, 7 Mar 2024 10:18:39 -0500
Subject: [PATCH 6/6] remove template arg

---
 libc/src/string/memset_explicit.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/string/memset_explicit.cpp b/libc/src/string/memset_explicit.cpp
index 348dad293aee81..a8656d1e791e84 100644
--- a/libc/src/string/memset_explicit.cpp
+++ b/libc/src/string/memset_explicit.cpp
@@ -15,7 +15,7 @@ namespace LIBC_NAMESPACE {
 [[gnu::noinline]] LLVM_LIBC_FUNCTION(void *, memset_explicit,
                                      (void *dst, int value, size_t count)) {
   // Use the inline memset function to set the memory.
-  inline_memset<true>(dst, static_cast<uint8_t>(value), count);
+  inline_memset(dst, static_cast<uint8_t>(value), count);
   // avoid dead store elimination
   // The asm itself should also be sufficient to behave as a compiler barrier.
   asm("" : : "r"(dst) : "memory");



More information about the libc-commits mailing list