[libc-commits] [libc] 8b87c3d - [libc] Add optimized memset for AArch64
Andre Vieira via libc-commits
libc-commits at lists.llvm.org
Thu Sep 23 01:21:59 PDT 2021
Author: Andre Vieira
Date: 2021-09-23T09:19:47+01:00
New Revision: 8b87c3d5736730cda1d8856098621029b759f3d1
URL: https://github.com/llvm/llvm-project/commit/8b87c3d5736730cda1d8856098621029b759f3d1
DIFF: https://github.com/llvm/llvm-project/commit/8b87c3d5736730cda1d8856098621029b759f3d1.diff
LOG: [libc] Add optimized memset for AArch64
Differential Revision: https://reviews.llvm.org/D107848
Added:
libc/src/string/aarch64/memset.cpp
Modified:
libc/src/string/CMakeLists.txt
libc/src/string/memory_utils/elements_aarch64.h
Removed:
################################################################################
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index b652befe3976c..aa22fa08e9f4c 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -341,7 +341,7 @@ endif()
function(add_memset memset_name)
add_implementation(memset ${memset_name}
- SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp
+ SRCS ${MEMSET_SRC}
HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h
DEPENDS
.memory_utils.memory_utils
@@ -353,13 +353,20 @@ function(add_memset memset_name)
endfunction()
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
+ set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memset(memset)
+elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
+ set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memset.cpp)
+ add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
+ COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
+ add_memset(memset COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
else()
+ set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memset(memset)
endif()
diff --git a/libc/src/string/aarch64/memset.cpp b/libc/src/string/aarch64/memset.cpp
new file mode 100644
index 0000000000000..fa66ffe1cc993
--- /dev/null
+++ b/libc/src/string/aarch64/memset.cpp
@@ -0,0 +1,49 @@
+//===-- Implementation of memset ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/memset.h"
+#include "src/__support/common.h"
+#include "src/string/memory_utils/memset_utils.h"
+
+namespace __llvm_libc {
+
+using namespace __llvm_libc::aarch64_memset;
+
+inline static void AArch64Memset(char *dst, int value, size_t count) {
+ if (count == 0)
+ return;
+ if (count <= 3) {
+ SplatSet<_1>(dst, value);
+ if (count > 1)
+ SplatSet<Tail<_2>>(dst, value, count);
+ return;
+ }
+ if (count <= 8)
+ return SplatSet<HeadTail<_4>>(dst, value, count);
+ if (count <= 16)
+ return SplatSet<HeadTail<_8>>(dst, value, count);
+ if (count <= 32)
+ return SplatSet<HeadTail<_16>>(dst, value, count);
+ if (count <= 96) {
+ SplatSet<_32>(dst, value);
+ if (count <= 64)
+ return SplatSet<Tail<_32>>(dst, value, count);
+ SplatSet<Skip<32>::Then<_32>>(dst, value);
+ SplatSet<Tail<_32>>(dst, value, count);
+ return;
+ }
+ if (count < 448 || value != 0 || !AArch64ZVA(dst, count))
+ return SplatSet<Align<_16, Arg::_1>::Then<Loop<_64>>>(dst, value, count);
+}
+
+LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) {
+ AArch64Memset((char *)dst, value, count);
+ return dst;
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/string/memory_utils/elements_aarch64.h b/libc/src/string/memory_utils/elements_aarch64.h
index 7f722afbb6a96..366efc181e9a7 100644
--- a/libc/src/string/memory_utils/elements_aarch64.h
+++ b/libc/src/string/memory_utils/elements_aarch64.h
@@ -18,6 +18,54 @@
#endif
namespace __llvm_libc {
+namespace aarch64_memset {
+#ifdef __ARM_NEON
+struct Splat8 {
+ static constexpr size_t kSize = 8;
+ static void SplatSet(char *dst, const unsigned char value) {
+ vst1_u8((uint8_t *)dst, vdup_n_u8(value));
+ }
+};
+
+struct Splat16 {
+ static constexpr size_t kSize = 16;
+ static void SplatSet(char *dst, const unsigned char value) {
+ vst1q_u8((uint8_t *)dst, vdupq_n_u8(value));
+ }
+};
+
+using _8 = Splat8;
+using _16 = Splat16;
+#else
+using _8 = __llvm_libc::scalar::_8;
+using _16 = Repeated<_8, 2>;
+#endif // __ARM_NEON
+
+using _1 = __llvm_libc::scalar::_1;
+using _2 = __llvm_libc::scalar::_2;
+using _3 = __llvm_libc::scalar::_3;
+using _4 = __llvm_libc::scalar::_4;
+using _32 = Chained<_16, _16>;
+using _64 = Chained<_32, _32>;
+
+struct ZVA {
+ static constexpr size_t kSize = 64;
+ static void SplatSet(char *dst, const unsigned char value) {
+ asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
+ }
+};
+
+inline static bool AArch64ZVA(char *dst, size_t count) {
+ uint64_t zva_val;
+ asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
+ if ((zva_val & 31) != 4)
+ return false;
+ SplatSet<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
+ return true;
+}
+
+} // namespace aarch64_memset
+
namespace aarch64 {
using _1 = __llvm_libc::scalar::_1;
More information about the libc-commits
mailing list