[libc-commits] [libc] 8b87c3d - [libc] Add optimized memset for AArch64

Andre Vieira via libc-commits libc-commits at lists.llvm.org
Thu Sep 23 01:21:59 PDT 2021


Author: Andre Vieira
Date: 2021-09-23T09:19:47+01:00
New Revision: 8b87c3d5736730cda1d8856098621029b759f3d1

URL: https://github.com/llvm/llvm-project/commit/8b87c3d5736730cda1d8856098621029b759f3d1
DIFF: https://github.com/llvm/llvm-project/commit/8b87c3d5736730cda1d8856098621029b759f3d1.diff

LOG: [libc] Add optimized memset for AArch64

Differential Revision: https://reviews.llvm.org/D107848

Added: 
    libc/src/string/aarch64/memset.cpp

Modified: 
    libc/src/string/CMakeLists.txt
    libc/src/string/memory_utils/elements_aarch64.h

Removed: 
    


################################################################################
diff  --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index b652befe3976c..aa22fa08e9f4c 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -341,7 +341,7 @@ endif()
 
 function(add_memset memset_name)
   add_implementation(memset ${memset_name}
-    SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp
+    SRCS ${MEMSET_SRC}
     HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h
     DEPENDS
       .memory_utils.memory_utils
@@ -353,13 +353,20 @@ function(add_memset memset_name)
 endfunction()
 
 if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
+  set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
   add_memset(memset_x86_64_opt_sse2   COMPILE_OPTIONS -march=k8             REQUIRE SSE2)
   add_memset(memset_x86_64_opt_sse4   COMPILE_OPTIONS -march=nehalem        REQUIRE SSE4_2)
   add_memset(memset_x86_64_opt_avx2   COMPILE_OPTIONS -march=haswell        REQUIRE AVX2)
   add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
   add_memset(memset_opt_host          COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
   add_memset(memset)
+elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+  set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memset.cpp)
+  add_memset(memset_opt_host          COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
+                                      COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
+  add_memset(memset                   COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
 else()
+  set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
   add_memset(memset_opt_host          COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
   add_memset(memset)
 endif()

diff  --git a/libc/src/string/aarch64/memset.cpp b/libc/src/string/aarch64/memset.cpp
new file mode 100644
index 0000000000000..fa66ffe1cc993
--- /dev/null
+++ b/libc/src/string/aarch64/memset.cpp
@@ -0,0 +1,49 @@
+//===-- Implementation of memset ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/memset.h"
+#include "src/__support/common.h"
+#include "src/string/memory_utils/memset_utils.h"
+
+namespace __llvm_libc {
+
+using namespace __llvm_libc::aarch64_memset;
+
+inline static void AArch64Memset(char *dst, int value, size_t count) {
+  if (count == 0)
+    return;
+  if (count <= 3) {
+    SplatSet<_1>(dst, value);
+    if (count > 1)
+      SplatSet<Tail<_2>>(dst, value, count);
+    return;
+  }
+  if (count <= 8)
+    return SplatSet<HeadTail<_4>>(dst, value, count);
+  if (count <= 16)
+    return SplatSet<HeadTail<_8>>(dst, value, count);
+  if (count <= 32)
+    return SplatSet<HeadTail<_16>>(dst, value, count);
+  if (count <= 96) {
+    SplatSet<_32>(dst, value);
+    if (count <= 64)
+      return SplatSet<Tail<_32>>(dst, value, count);
+    SplatSet<Skip<32>::Then<_32>>(dst, value);
+    SplatSet<Tail<_32>>(dst, value, count);
+    return;
+  }
+  if (count < 448 || value != 0 || !AArch64ZVA(dst, count))
+    return SplatSet<Align<_16, Arg::_1>::Then<Loop<_64>>>(dst, value, count);
+}
+
+LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) {
+  AArch64Memset((char *)dst, value, count);
+  return dst;
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/string/memory_utils/elements_aarch64.h b/libc/src/string/memory_utils/elements_aarch64.h
index 7f722afbb6a96..366efc181e9a7 100644
--- a/libc/src/string/memory_utils/elements_aarch64.h
+++ b/libc/src/string/memory_utils/elements_aarch64.h
@@ -18,6 +18,54 @@
 #endif
 
 namespace __llvm_libc {
+namespace aarch64_memset {
+#ifdef __ARM_NEON
+struct Splat8 {
+  static constexpr size_t kSize = 8;
+  static void SplatSet(char *dst, const unsigned char value) {
+    vst1_u8((uint8_t *)dst, vdup_n_u8(value));
+  }
+};
+
+struct Splat16 {
+  static constexpr size_t kSize = 16;
+  static void SplatSet(char *dst, const unsigned char value) {
+    vst1q_u8((uint8_t *)dst, vdupq_n_u8(value));
+  }
+};
+
+using _8 = Splat8;
+using _16 = Splat16;
+#else
+using _8 = __llvm_libc::scalar::_8;
+using _16 = Repeated<_8, 2>;
+#endif // __ARM_NEON
+
+using _1 = __llvm_libc::scalar::_1;
+using _2 = __llvm_libc::scalar::_2;
+using _3 = __llvm_libc::scalar::_3;
+using _4 = __llvm_libc::scalar::_4;
+using _32 = Chained<_16, _16>;
+using _64 = Chained<_32, _32>;
+
+struct ZVA {
+  static constexpr size_t kSize = 64;
+  static void SplatSet(char *dst, const unsigned char value) {
+    asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
+  }
+};
+
+inline static bool AArch64ZVA(char *dst, size_t count) {
+  uint64_t zva_val;
+  asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
+  if ((zva_val & 31) != 4)
+    return false;
+  SplatSet<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
+  return true;
+}
+
+} // namespace aarch64_memset
+
 namespace aarch64 {
 
 using _1 = __llvm_libc::scalar::_1;


        


More information about the libc-commits mailing list