[libc-commits] [libc] 93ac449 - [libc] Make string functions buildable with GCC

Sun Dec 18 07:01:11 PST 2022

Author: Guillaume Chatelet
Date: 2022-12-18T14:56:01Z
New Revision: 93ac449369be8cb085131b6700fc99b2ff2d9c7c

URL: https://github.com/llvm/llvm-project/commit/93ac449369be8cb085131b6700fc99b2ff2d9c7c
DIFF: https://github.com/llvm/llvm-project/commit/93ac449369be8cb085131b6700fc99b2ff2d9c7c.diff

LOG: [libc] Make string functions buildable with GCC

Differential Revision: https://reviews.llvm.org/D139939

Added: 
    

Modified: 
    libc/src/__support/compiler_features.h
    libc/src/string/memory_utils/bcmp_implementations.h
    libc/src/string/memory_utils/memcmp_implementations.h
    libc/src/string/memory_utils/memcpy_implementations.h
    libc/src/string/memory_utils/memmove_implementations.h
    libc/src/string/memory_utils/memset_implementations.h
    libc/src/string/memory_utils/op_builtin.h
    libc/src/string/memory_utils/op_generic.h
    libc/src/string/memory_utils/op_x86.h

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/compiler_features.h b/libc/src/__support/compiler_features.h
index a30d2a212d32a..fed57592067b6 100644

--- a/libc/src/__support/compiler_features.h
+++ b/libc/src/__support/compiler_features.h
@@ -38,4 +38,12 @@
 #define LLVM_LIBC_HAS_FEATURE(FEATURE) 0
 #endif
 
+#if defined(LLVM_LIBC_COMPILER_CLANG)
+#define LLVM_LIBC_LOOP_NOUNROLL _Pragma("nounroll")
+#elif defined(LLVM_LIBC_COMPILER_GCC)
+#define LLVM_LIBC_LOOP_NOUNROLL _Pragma("GCC unroll 0")
+#else
+#define LLVM_LIBC_LOOP_NOUNROLL
+#endif
+
 #endif // LLVM_LIBC_SUPPORT_COMPILER_FEATURES_H

diff  --git a/libc/src/string/memory_utils/bcmp_implementations.h b/libc/src/string/memory_utils/bcmp_implementations.h
index e7ded19d12066..18a4ab829851e 100644
--- a/libc/src/string/memory_utils/bcmp_implementations.h
+++ b/libc/src/string/memory_utils/bcmp_implementations.h
@@ -22,7 +22,7 @@ namespace __llvm_libc {
 
 [[maybe_unused]] static inline BcmpReturnType
 inline_bcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
   for (size_t offset = 0; offset < count; ++offset)
     if (auto value = generic::Bcmp<1>::block(p1 + offset, p2 + offset))
       return value;

diff  --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h
index 33fbd7ca92fb0..1dac6e011c8c1 100644
--- a/libc/src/string/memory_utils/memcmp_implementations.h
+++ b/libc/src/string/memory_utils/memcmp_implementations.h
@@ -22,7 +22,7 @@
 namespace __llvm_libc {
 [[maybe_unused]] static inline MemcmpReturnType
 inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
   for (size_t offset = 0; offset < count; ++offset)
     if (auto value = generic::Memcmp<1>::block(p1 + offset, p2 + offset))
       return value;
@@ -83,6 +83,7 @@ inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
   }
   return x86::avx512bw::Memcmp<64>::loop_and_tail(p1, p2, count);
 }
+
 #endif // defined(LLVM_LIBC_ARCH_X86)
 
 #if defined(LLVM_LIBC_ARCH_AARCH64)

diff  --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h
index 8d8ba6f4e9d6a..4372733a0f027 100644
--- a/libc/src/string/memory_utils/memcpy_implementations.h
+++ b/libc/src/string/memory_utils/memcpy_implementations.h
@@ -24,7 +24,7 @@ namespace __llvm_libc {
 [[maybe_unused]] static inline void
 inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src,
                             size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
   for (size_t offset = 0; offset < count; ++offset)
     builtin::Memcpy<1>::block(dst + offset, src + offset);
 }

diff  --git a/libc/src/string/memory_utils/memmove_implementations.h b/libc/src/string/memory_utils/memmove_implementations.h
index dfea5fa5900c4..7e26b36a86135 100644
--- a/libc/src/string/memory_utils/memmove_implementations.h
+++ b/libc/src/string/memory_utils/memmove_implementations.h
@@ -23,11 +23,11 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
   if ((count == 0) || (dst == src))
     return;
   if (dst < src) {
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
     for (size_t offset = 0; offset < count; ++offset)
       builtin::Memcpy<1>::block(dst + offset, src + offset);
   } else {
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
     for (ptr
diff _t offset = count - 1; offset >= 0; --offset)
       builtin::Memcpy<1>::block(dst + offset, src + offset);
   }

diff  --git a/libc/src/string/memory_utils/memset_implementations.h b/libc/src/string/memory_utils/memset_implementations.h
index 58779f7b1f88f..dbcc356bcdf07 100644
--- a/libc/src/string/memory_utils/memset_implementations.h
+++ b/libc/src/string/memory_utils/memset_implementations.h
@@ -22,7 +22,7 @@ namespace __llvm_libc {
 
 [[maybe_unused]] inline static void
 inline_memset_embedded_tiny(Ptr dst, uint8_t value, size_t count) {
-#pragma nounroll
+  LLVM_LIBC_LOOP_NOUNROLL
   for (size_t offset = 0; offset < count; ++offset)
     generic::Memset<1, 1>::block(dst + offset, value);
 }

diff  --git a/libc/src/string/memory_utils/op_builtin.h b/libc/src/string/memory_utils/op_builtin.h
index 68ae86242d645..ce33de3e64678 100644
--- a/libc/src/string/memory_utils/op_builtin.h
+++ b/libc/src/string/memory_utils/op_builtin.h
@@ -27,9 +27,9 @@ template <size_t Size> struct Memcpy {
 #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
     return __builtin_memcpy_inline(dst, src, SIZE);
 #else
-    deferred_static_assert("Missing __builtin_memcpy_inline");
-    (void)dst;
-    (void)src;
+    // The codegen may be suboptimal.
+    for (size_t i = 0; i < Size; ++i)
+      dst[i] = src[i];
 #endif
   }
 

diff  --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h
index e21ea6c51b547..1603dbf4ebfd2 100644
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -26,6 +26,7 @@
 #include "src/__support/CPP/array.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
+#include "src/__support/compiler_features.h"
 #include "src/__support/endian.h"
 #include "src/string/memory_utils/op_builtin.h"
 #include "src/string/memory_utils/utils.h"
@@ -71,9 +72,34 @@ template <typename T> struct ScalarType {
   }
 };
 
+// GCC can only take literals as __vector_size__ argument so we have to use
+// template specialization.
+template <size_t Size> struct VectorValueType {};
+template <> struct VectorValueType<1> {
+  using type = uint8_t __attribute__((__vector_size__(1)));
+};
+template <> struct VectorValueType<2> {
+  using type = uint8_t __attribute__((__vector_size__(2)));
+};
+template <> struct VectorValueType<4> {
+  using type = uint8_t __attribute__((__vector_size__(4)));
+};
+template <> struct VectorValueType<8> {
+  using type = uint8_t __attribute__((__vector_size__(8)));
+};
+template <> struct VectorValueType<16> {
+  using type = uint8_t __attribute__((__vector_size__(16)));
+};
+template <> struct VectorValueType<32> {
+  using type = uint8_t __attribute__((__vector_size__(32)));
+};
+template <> struct VectorValueType<64> {
+  using type = uint8_t __attribute__((__vector_size__(64)));
+};
+
 // Implements load, store and splat for vector types.
 template <size_t Size> struct VectorType {
-  using Type = uint8_t __attribute__((__vector_size__(Size)));
+  using Type = typename VectorValueType<Size>::type;
   static inline Type load(CPtr src) { return ::__llvm_libc::load<Type>(src); }
   static inline void store(Ptr dst, Type value) {
     ::__llvm_libc::store<Type>(dst, value);
@@ -434,7 +460,7 @@ template <size_t Size, size_t MaxSize> struct Memmove {
     const size_t tail_offset = count - Size;
     const auto tail_value = T::load(src + tail_offset);
     size_t offset = 0;
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
     do {
       block(dst + offset, src + offset);
       offset += Size;
@@ -460,7 +486,7 @@ template <size_t Size, size_t MaxSize> struct Memmove {
     static_assert(Size > 1, "a loop of size 1 does not need tail");
     const auto head_value = T::load(src);
     ptr
diff _t offset = count - Size;
-#pragma nounroll
+    LLVM_LIBC_LOOP_NOUNROLL
     do {
       block(dst + offset, src + offset);
       offset -= Size;

diff  --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h
index f68af00cca874..b2355d9555eb5 100644
--- a/libc/src/string/memory_utils/op_x86.h
+++ b/libc/src/string/memory_utils/op_x86.h
@@ -99,17 +99,24 @@ template <size_t Size, size_t BlockSize, auto BlockBcmp> struct BcmpImpl {
 
 namespace sse2 {
 static inline BcmpReturnType bcmp16(CPtr p1, CPtr p2) {
+#if defined(__SSE2__)
   using T = char __attribute__((__vector_size__(16)));
   // A mask indicating which bytes 
diff er after loading 16 bytes from p1 and p2.
   const int mask =
       _mm_movemask_epi8(cpp::bit_cast<__m128i>(load<T>(p1) != load<T>(p2)));
   return static_cast<uint32_t>(mask);
+#else
+  (void)p1;
+  (void)p2;
+  return BcmpReturnType::ZERO();
+#endif // defined(__SSE2__)
 }
 template <size_t Size> using Bcmp = BcmpImpl<Size, 16, bcmp16>;
 } // namespace sse2
 
 namespace avx2 {
 static inline BcmpReturnType bcmp32(CPtr p1, CPtr p2) {
+#if defined(__AVX2__)
   using T = char __attribute__((__vector_size__(32)));
   // A mask indicating which bytes 
diff er after loading 32 bytes from p1 and p2.
   const int mask =
@@ -117,17 +124,29 @@ static inline BcmpReturnType bcmp32(CPtr p1, CPtr p2) {
   // _mm256_movemask_epi8 returns an int but it is to be interpreted as a 32-bit
   // mask.
   return static_cast<uint32_t>(mask);
+#else
+  (void)p1;
+  (void)p2;
+  return BcmpReturnType::ZERO();
+#endif // defined(__AVX2__)
 }
 template <size_t Size> using Bcmp = BcmpImpl<Size, 32, bcmp32>;
 } // namespace avx2
 
 namespace avx512bw {
 static inline BcmpReturnType bcmp64(CPtr p1, CPtr p2) {
+#if defined(__AVX512BW__)
   using T = char __attribute__((__vector_size__(64)));
   // A mask indicating which bytes 
diff er after loading 64 bytes from p1 and p2.
-  const uint64_t mask = _mm512_cmpneq_epi8_mask(load<T>(p1), load<T>(p2));
+  const uint64_t mask = _mm512_cmpneq_epi8_mask(
+      cpp::bit_cast<__m512i>(load<T>(p1)), cpp::bit_cast<__m512i>(load<T>(p2)));
   const bool mask_is_set = mask != 0;
   return static_cast<uint32_t>(mask_is_set);
+#else
+  (void)p1;
+  (void)p2;
+  return BcmpReturnType::ZERO();
+#endif // defined(__AVX512BW__)
 }
 template <size_t Size> using Bcmp = BcmpImpl<Size, 64, bcmp64>;
 } // namespace avx512bw
@@ -192,35 +211,55 @@ struct MemcmpImpl {
 
 namespace sse2 {
 static inline MemcmpReturnType memcmp16(CPtr p1, CPtr p2) {
+#if defined(__SSE2__)
   using T = char __attribute__((__vector_size__(16)));
   // A mask indicating which bytes 
diff er after loading 16 bytes from p1 and p2.
   if (int mask =
           _mm_movemask_epi8(cpp::bit_cast<__m128i>(load<T>(p1) != load<T>(p2))))
     return char_
diff _no_zero(p1, p2, mask);
   return MemcmpReturnType::ZERO();
+#else
+  (void)p1;
+  (void)p2;
+  return MemcmpReturnType::ZERO();
+#endif // defined(__SSE2__)
 }
 template <size_t Size> using Memcmp = MemcmpImpl<Size, 16, memcmp16, bcmp16>;
 } // namespace sse2
 
 namespace avx2 {
 static inline MemcmpReturnType memcmp32(CPtr p1, CPtr p2) {
+#if defined(__AVX2__)
   using T = char __attribute__((__vector_size__(32)));
   // A mask indicating which bytes 
diff er after loading 32 bytes from p1 and p2.
   if (int mask = _mm256_movemask_epi8(
           cpp::bit_cast<__m256i>(load<T>(p1) != load<T>(p2))))
     return char_
diff _no_zero(p1, p2, mask);
   return MemcmpReturnType::ZERO();
+#else
+  (void)p1;
+  (void)p2;
+  return MemcmpReturnType::ZERO();
+#endif // defined(__AVX2__)
 }
 template <size_t Size> using Memcmp = MemcmpImpl<Size, 32, memcmp32, bcmp32>;
 } // namespace avx2
 
 namespace avx512bw {
 static inline MemcmpReturnType memcmp64(CPtr p1, CPtr p2) {
+#if defined(__AVX512BW__)
   using T = char __attribute__((__vector_size__(64)));
   // A mask indicating which bytes 
diff er after loading 64 bytes from p1 and p2.
-  if (uint64_t mask = _mm512_cmpneq_epi8_mask(load<T>(p1), load<T>(p2)))
+  if (uint64_t mask =
+          _mm512_cmpneq_epi8_mask(cpp::bit_cast<__m512i>(load<T>(p1)),
+                                  cpp::bit_cast<__m512i>(load<T>(p2))))
     return char_
diff _no_zero(p1, p2, mask);
   return MemcmpReturnType::ZERO();
+#else
+  (void)p1;
+  (void)p2;
+  return MemcmpReturnType::ZERO();
+#endif // defined(__AVX512BW__)
 }
 template <size_t Size> using Memcmp = MemcmpImpl<Size, 64, memcmp64, bcmp64>;
 } // namespace avx512bw