[clang] [libc] Adding a version of memset with software prefetching (PR #70493)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 30 10:36:33 PDT 2023
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {clang-format}-->
:warning: C/C++ code formatter, clang-format found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
git-clang-format --diff 08e9c462eaa900449c2250c7ae84d9d303de9fb4 9fe0041c2bb8ba1d522538c79ac1ebae7d0632bb -- libc/src/string/memory_utils/op_generic.h libc/src/string/memory_utils/x86_64/inline_memset.h
``````````
</details>
<details>
<summary>
View the diff from clang-format here.
</summary>
``````````diff
diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h
index 12eeb65a1edc..af6a814be154 100644
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -49,11 +49,11 @@ using generic_v512 = uint8_t __attribute__((__vector_size__(64)));
} // namespace LIBC_NAMESPACE
namespace sw_prefetch {
- // Size of a cacheline for software prefetching
+// Size of a cacheline for software prefetching
static constexpr size_t kCachelineSize = 64;
- // prefetch for write
+// prefetch for write
static inline void PrefetchW(CPtr dst) { __builtin_prefetch(dst, 1, 3); }
-}
+} // namespace sw_prefetch
namespace LIBC_NAMESPACE::generic {
diff --git a/libc/src/string/memory_utils/x86_64/inline_memset.h b/libc/src/string/memory_utils/x86_64/inline_memset.h
index bc7a6162f77b..9000aa03019d 100644
--- a/libc/src/string/memory_utils/x86_64/inline_memset.h
+++ b/libc/src/string/memory_utils/x86_64/inline_memset.h
@@ -40,56 +40,56 @@ LIBC_INLINE_VAR constexpr bool kUseSoftwarePrefetchingMemset =
using uint512_t = cpp::array<uint64_t, 8>;
#endif
-[[maybe_unused]] LIBC_INLINE static void
-inline_memset_x86_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
- sw_prefetch::PrefetchW(dst + generic::kCachelineSize);
- if (count <= 128)
- return generic::Memset<uint512_t>::head_tail(dst, value, count);
- sw_prefetch::PrefetchW(dst + generic::kCachelineSize * 2);
- // Aligned loop
- generic::Memset<uint256_t>::block(dst, value);
- align_to_next_boundary<32>(dst, count);
- if (count <= 192) {
- return Memset<uint256_t>::loop_and_tail(dst, value, count);
- } else {
- // Warm up memset
+ [[maybe_unused]] LIBC_INLINE static void
+ inline_memset_x86_sw_prefetching(Ptr dst, uint8_t value, size_t count) {
+ sw_prefetch::PrefetchW(dst + generic::kCachelineSize);
+ if (count <= 128)
+ return generic::Memset<uint512_t>::head_tail(dst, value, count);
+ sw_prefetch::PrefetchW(dst + generic::kCachelineSize * 2);
+ // Aligned loop
generic::Memset<uint256_t>::block(dst, value);
- generic::Memset<uint128_t>::block(dst + 64, value);
- return generic::Memset<uint256_t>::loop_and_tail_prefetch<320, 128, 96>(dst, value,
- count);
+ align_to_next_boundary<32>(dst, count);
+ if (count <= 192) {
+ return Memset<uint256_t>::loop_and_tail(dst, value, count);
+ } else {
+ // Warm up memset
+ generic::Memset<uint256_t>::block(dst, value);
+ generic::Memset<uint128_t>::block(dst + 64, value);
+ return generic::Memset<uint256_t>::loop_and_tail_prefetch<320, 128, 96>(
+ dst, value, count);
+ }
}
-}
-[[maybe_unused]] LIBC_INLINE static void
-inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
- if (count == 0)
- return;
- if (count == 1)
- return generic::Memset<uint8_t>::block(dst, value);
- if (count == 2)
- return generic::Memset<uint16_t>::block(dst, value);
- if (count == 3)
- return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
- if (count <= 8)
- return generic::Memset<uint32_t>::head_tail(dst, value, count);
- if (count <= 16)
- return generic::Memset<uint64_t>::head_tail(dst, value, count);
- if (count <= 32)
- return generic::Memset<uint128_t>::head_tail(dst, value, count);
- if (count <= 64)
- return generic::Memset<uint256_t>::head_tail(dst, value, count);
- if constexpr (x86::kUseSoftwarePrefetchingMemset) {
- return inline_memset_x86_sw_prefetching(dst, value, count);
- }
- if (count <= 128)
- return generic::Memset<uint512_t>::head_tail(dst, value, count);
- // Aligned loop
- generic::Memset<uint256_t>::block(dst, value);
- align_to_next_boundary<32>(dst, count);
- else {
- return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
+ [[maybe_unused]] LIBC_INLINE static void
+ inline_memset_x86(Ptr dst, uint8_t value, size_t count) {
+ if (count == 0)
+ return;
+ if (count == 1)
+ return generic::Memset<uint8_t>::block(dst, value);
+ if (count == 2)
+ return generic::Memset<uint16_t>::block(dst, value);
+ if (count == 3)
+ return generic::MemsetSequence<uint16_t, uint8_t>::block(dst, value);
+ if (count <= 8)
+ return generic::Memset<uint32_t>::head_tail(dst, value, count);
+ if (count <= 16)
+ return generic::Memset<uint64_t>::head_tail(dst, value, count);
+ if (count <= 32)
+ return generic::Memset<uint128_t>::head_tail(dst, value, count);
+ if (count <= 64)
+ return generic::Memset<uint256_t>::head_tail(dst, value, count);
+ if constexpr (x86::kUseSoftwarePrefetchingMemset) {
+ return inline_memset_x86_sw_prefetching(dst, value, count);
+ }
+ if (count <= 128)
+ return generic::Memset<uint512_t>::head_tail(dst, value, count);
+ // Aligned loop
+ generic::Memset<uint256_t>::block(dst, value);
+ align_to_next_boundary<32>(dst, count);
+ else {
+ return generic::Memset<uint256_t>::loop_and_tail(dst, value, count);
+ }
}
-}
} // namespace LIBC_NAMESPACE
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_MEMSET_H
``````````
</details>
https://github.com/llvm/llvm-project/pull/70493
More information about the cfe-commits
mailing list